diff --git a/mpeg4/bench.mk b/mpeg4/bench.mk new file mode 100644 index 0000000000000000000000000000000000000000..22f693c8e4d36c2e660d856405e97331da009af1 --- /dev/null +++ b/mpeg4/bench.mk @@ -0,0 +1,134 @@ +#### mpeg4 #### +DIR:=$(call my-dir) + +# Which file we measure the size +get_which_size=mpeg4_bench + +# Running commands +run_cmd=${PERFLAB_PATH}/mpeg4_bench -s 1280x720 -i ${PERFLAB_INPUT}/dance_100frame.avi -benchmark -y ${PERFLAB_INPUT}/mpeg4_output/dec_dance_frame_%03d.ppm; ${PERFLAB_PATH}/mpeg4_bench -i ${PERFLAB_INPUT}/mpeg4_output/dec_dance_frame_%03d.ppm -r 30 -s 1280x720 -sameq -g 9 -bf 3 -vcodec mpeg4 -benchmark -an -y ${PERFLAB_INPUT}/enc_dance_output.avi + +ffmpeg_cflags := -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE +ffmpeg_includes := src/ src/libavutil src/libavcodec src/libavformat + +##################################### +include $(BUILD)/clear.mk +TARGET := libavutil.a +##################################### + +target_srcs := mathematics.c rational.c intfloat_readwrite.c crc.c +target_prefix := libavutil + +target_local_includes := src/ src/libavutil +target_local_cflags := -DHAVE_AV_CONFIG_H -DBUILD_AVUTIL $(ffmpeg_cflags) + +include $(BUILD)/build_library.mk + +##################################### +include $(BUILD)/clear.mk +TARGET := libavcodec.a +##################################### + +target_srcs := bitstream.c utils.c mem.c allcodecs.c \ + mpegvideo.c jrevdct.c jfdctfst.c jfdctint.c\ + mpegaudio.c ac3enc.c mjpeg.c resample.c resample2.c dsputil.c \ + motion_est.c imgconvert.c imgresample.c \ + mpeg12.c mpegaudiodec.c pcm.c simple_idct.c \ + ratecontrol.c adpcm.c eval.c error_resilience.c \ + fft.c mdct.c raw.c golomb.c cabac.c\ + dpcm.c adx.c faandct.c parser.c g726.c \ + vp3dsp.c h264idct.c rangecoder.c pnm.c h263.c msmpeg4.c h263dec.c \ + opt.c + +target_srcs += aasc.c alac.c asv1.c avs.c cinepak.c cook.c cljr.c cyuv.c \ + dvbsubdec.c dvbsub.c dvdsub.c dvdsubenc.c dv.c 8bps.c ffv1.c \ + flac.c flicvideo.c 4xm.c fraps.c h261.c h264.c huffyuv.c \ + idcinvideo.c indeo2.c indeo3.c interplayvideo.c kmvc.c lcl.c \ + loco.c mace.c msrle.c msvideo1.c png.c qdm2.c qdrw.c qpeg.c \ + qtrle.c ra144.c ra288.c roqvideo.c rpza.c rv10.c shorten.c \ + smacker.c smc.c snow.c sonic.c svq1.c truemotion1.c truemotion2.c \ + truespeech.c tta.c tscc.c cscd.c lzo.c nuv.c rtjpeg.c \ + ulti.c vc9.c vcr1.c vmdav.c vorbis.c vp3.c vqavideo.c wmadec.c \ + wnv1.c ws-snd1.c xan.c xl.c bmp.c mmvideo.c zmbv.c + + +target_prefix := libavcodec + +target_local_includes := src/ src/libavutil \ + $(android_root)/external/zlib + +target_local_cflags := -DHAVE_AV_CONFIG_H +target_local_android_shared_libs := + +include $(BUILD)/build_library.mk + +##################################### +include $(BUILD)/clear.mk +TARGET := libavformat.a +##################################### + +target_srcs := utils.c cutils.c os_support.c allformats.c \ + mpeg.c mpegts.c mpegtsenc.c ffm.c crc.c img.c img2.c raw.c rm.c \ + avienc.c avidec.c wav.c mmf.c swf.c au.c gif.c mov.c mpjpeg.c dv.c \ + yuv4mpeg.c 4xm.c flvdec.c psxstr.c idroq.c ipmovie.c \ + nut.c wc3movie.c mp3.c westwood.c segafilm.c idcin.c flic.c \ + sierravmd.c matroska.c sol.c electronicarts.c nsvdec.c asf.c \ + ogg2.c oggparsevorbis.c oggparsetheora.c oggparseflac.c daud.c aiff.c \ + voc.c tta.c mm.c avs.c smacker.c nuv.c + +# muxers +target_srcs += flvenc.c movenc.c asf-enc.c adtsenc.c + +# image formats +target_srcs += pnm.c yuv.c png.c jpeg.c gifdec.c sgi.c framehook.c + +# CONFIG_VEDEO4LINUX +target_srcs += grab.c + +# CONFIG_VIDEO4LINUX2 +target_srcs += v4l2.c + +# CONFIG_DV1394 +target_srcs += dv1394.c + +# CONFIG_AUDIO_OSS +target_srcs += audio.c + +# protocols I/O +target_srcs += avio.c aviobuf.c + +# CONFIG_PROTOCOLS +target_srcs += file.c + +# CONFIG_NETWORK +target_srcs += udp.c tcp.c http.c rtsp.c rtp.c rtpproto.c + +target_prefix := libavformat +target_local_cflags := -DHAVE_AV_CONFIG_H $(ffmpeg_cflags) +target_local_includes := $(ffmpeg_includes) \ + $(android_root)/external/zlib + +target_local_static_libs := libavutil libavcodec + +include $(BUILD)/build_library.mk + +##################################### +include $(BUILD)/clear.mk +TARGET := mpeg4_bench +##################################### + +target_srcs := ffmpeg.c cmdutils.c + +target_prefix := +target_local_cflags := $(ffmpeg_cflags) + +target_local_includes := src/ \ + src/libavutil \ + src/libavcodec \ + src/libavformat + +target_local_android_static_libs := +target_local_android_shared_libs := libz libdl +target_local_static_libs := libavformat libavcodec libavutil +target_local_shared_libs := + +include $(BUILD)/build_executable.mk diff --git a/mpeg4/data/dance_100frame.avi b/mpeg4/data/dance_100frame.avi new file mode 100644 index 0000000000000000000000000000000000000000..801a204eee581b9b957df95bf4908cb574c8b44a Binary files /dev/null and b/mpeg4/data/dance_100frame.avi differ diff --git a/mpeg4/src/COPYING b/mpeg4/src/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..1e09914471c2484860e654df65a0f9799e632cc0 --- /dev/null +++ b/mpeg4/src/COPYING @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/mpeg4/src/CREDITS b/mpeg4/src/CREDITS new file mode 100644 index 0000000000000000000000000000000000000000..b937f7a34be0d15b895b5819684a8140e3b6639a --- /dev/null +++ b/mpeg4/src/CREDITS @@ -0,0 +1,45 @@ +This file contains the name of the people who have contributed to +FFmpeg. The names are sorted alphabetically by last name. + +Michel Bardiaux +Fabrice Bellard +Patrice Bensoussan +Alex Beregszaszi +BERO +Mario Brito +Ronald Bultje +Maarten Daniels +Reimar Doeffinger +Tim Ferguson +Brian Foley +Arpad Gereoffy +Philip Gladstone +Vladimir Gneushev +Roine Gustafsson +David Hammerton +Wolfgang Hesseler +Falk Hueffner +Steven Johnson +Zdenek Kabelac +Robin Kay +Todd Kirby +Nick Kurshev +Benjamin Larsson +Loïc Le Loarer +Daniel Maas +Mike Melanson +Loren Merritt +Jeff Muizelaar +Michael Niedermayer +François Revol +Peter Ross +Måns Rullgård +Roman Shaposhnik +Dieter Shirley +Konstantin Shishkov +Juan J. Sierralta +Ewald Snel +Sascha Sommer +Leon van Stuivenberg +Roberto Togni +Lionel Ulmer diff --git a/mpeg4/src/Changelog b/mpeg4/src/Changelog new file mode 100644 index 0000000000000000000000000000000000000000..b9ef5efcd6cd1fef8201210c3465bff82cc8e1f3 --- /dev/null +++ b/mpeg4/src/Changelog @@ -0,0 +1,368 @@ +version +- DV50 AKA DVCPRO50 encoder, decoder, muxer and demuxer +- TechSmith Camtasia (TSCC) video decoder +- IBM Ultimotion (ULTI) video decoder +- Sierra Online audio file demuxer and decoder +- Apple QuickDraw (qdrw) video decoder +- Creative ADPCM audio decoder (16 bits as well as 8 bits schemes) +- Electronic Arts Multimedia (WVE/UV2/etc.) file demuxer +- Miro VideoXL (VIXL) video decoder +- H.261 video encoder +- QPEG video decoder +- Nullsoft Video (NSV) file demuxer +- Shorten audio decoder +- LOCO video decoder +- Apple Lossless Audio Codec (ALAC) decoder +- Winnov WNV1 video decoder +- Autodesk Animator Studio Codec (AASC) decoder +- Indeo 2 video decoder +- Fraps FPS1 video decoder +- Snow video encoder/decoder +- Sonic audio encoder/decoder +- Vorbis audio decoder +- Macromedia ADPCM decoder +- Duck TrueMotion 2 video decoder +- support for decoding FLX and DTA extensions in FLIC files +- H.264 custom quantization matrices support +- ffserver fixed, it should now be usable again +- QDM2 audio decoder +- Real Cooker audio decoder +- TrueSpeech audio decoder +- WMA2 audio decoder fixed, now all files should play correctly +- RealAudio 14.4 and 28.8 decoders fixed +- JPEG-LS encoder and decoder +- CamStudio video decoder +- build system improvements +- tabs and trailing whitespace removed from the codebase +- AIFF/AIFF-C audio format, encoding and decoding +- ADTS AAC file reading and writing +- Creative VOC file reading and writing +- American Laser Games multimedia (*.mm) playback system +- Zip Blocks Motion Video decoder +- Improved Theora/VP3 decoder +- True Audio (TTA) decoder +- AVS demuxer and video decoder +- Smacker demuxer and decoder +- NuppelVideo/MythTV demuxer and RTjpeg decoder +- KMVC decoder + +version 0.4.9-pre1: + +- DV encoder, DV muxer +- Microsoft RLE video decoder +- Microsoft Video-1 decoder +- Apple Animation (RLE) decoder +- Apple Graphics (SMC) decoder +- Apple Video (RPZA) decoder +- Cinepak decoder +- Sega FILM (CPK) file demuxer +- Westwood multimedia support (VQA & AUD files) +- Id Quake II CIN playback support +- 8BPS video decoder +- FLIC playback support +- RealVideo 2.0 (RV20) decoder +- Duck TrueMotion v1 (DUCK) video decoder +- Sierra VMD demuxer and video decoder +- MSZH and ZLIB decoder support +- SVQ1 video encoder +- AMR-WB support +- PPC optimizations +- rate distortion optimal cbp support +- rate distorted optimal ac prediction for MPEG-4 +- rate distorted optimal lambda->qp support +- AAC encoding with libfaac +- Sunplus JPEG codec (SP5X) support +- use Lagrange multipler instead of QP for ratecontrol +- Theora/VP3 decoding support +- XA and ADX ADPCM codecs +- export MPEG-2 active display area / pan scan +- Add support for configuring with IBM XLC +- floating point AAN DCT +- initial support for zygo video (not complete) +- RGB ffv1 support +- new audio/video parser API +- av_log() system +- av_read_frame() and av_seek_frame() support +- missing last frame fixes +- seek by mouse in ffplay +- noise reduction of DCT coefficients +- H.263 OBMC & 4MV support +- H.263 alternative inter vlc support +- H.263 loop filter +- H.263 slice structured mode +- interlaced DCT support for MPEG-2 encoding +- stuffing to stay above min_bitrate +- MB type & QP visualization +- frame stepping for ffplay +- interlaced motion estimation +- alternate scantable support +- SVCD scan offset support +- closed GOP support +- SSE2 FDCT +- quantizer noise shaping +- G.726 ADPCM audio codec +- MS ADPCM encoding +- multithreaded/SMP motion estimation +- multithreaded/SMP encoding for MPEG-1/MPEG-2/MPEG-4/H.263 +- multithreaded/SMP decoding for MPEG-2 +- FLAC decoder +- Metrowerks CodeWarrior suppport +- H.263+ custom pcf support +- nicer output for 'ffmpeg -formats' +- Matroska demuxer +- SGI image format, encoding and decoding +- H.264 loop filter support +- H.264 CABAC support +- nicer looking arrows for the motion vector vissualization +- improved VCD support +- audio timestamp drift compensation +- MPEG-2 YUV 422/444 support +- polyphase kaiser windowed sinc and blackman nuttall windowed sinc audio resample +- better image scaling +- H.261 support +- correctly interleave packets during encoding +- VIS optimized motion compensation +- intra_dc_precision>0 encoding support +- support reuse of motion vectors/MB types/field select values of the source video +- more accurate deblock filter +- padding support +- many optimizations and bugfixes + +version 0.4.8: + +- MPEG-2 video encoding (Michael) +- Id RoQ playback subsystem (Mike Melanson and Tim Ferguson) +- Wing Commander III Movie (.mve) file playback subsystem (Mike Melanson + and Mario Brito) +- Xan DPCM audio decoder (Mario Brito) +- Interplay MVE playback subsystem (Mike Melanson) +- Duck DK3 and DK4 ADPCM audio decoders (Mike Melanson) + +version 0.4.7: + +- RealAudio 1.0 (14_4) and 2.0 (28_8) native decoders. Author unknown, code from mplayerhq + (originally from public domain player for Amiga at http://www.honeypot.net/audio) +- current version now also compiles with older GCC (Fabrice) +- 4X multimedia playback system including 4xm file demuxer (Mike + Melanson), and 4X video and audio codecs (Michael) +- Creative YUV (CYUV) decoder (Mike Melanson) +- FFV1 codec (our very simple lossless intra only codec, compresses much better + than HuffYUV) (Michael) +- ASV1 (Asus), H.264, Intel indeo3 codecs have been added (various) +- tiny PNG encoder and decoder, tiny GIF decoder, PAM decoder (PPM with + alpha support), JPEG YUV colorspace support. (Fabrice Bellard) +- ffplay has been replaced with a newer version which uses SDL (optionally) + for multiplatform support (Fabrice) +- Sorenson Version 3 codec (SVQ3) support has been added (decoding only) - donated + by anonymous +- AMR format has been added (Johannes Carlsson) +- 3GP support has been added (Johannes Carlsson) +- VP3 codec has been added (Mike Melanson) +- more MPEG-1/2 fixes +- better multiplatform support, MS Visual Studio fixes (various) +- AltiVec optimizations (Magnus Damn and others) +- SH4 processor support has been added (BERO) +- new public interfaces (avcodec_get_pix_fmt) (Roman Shaposhnick) +- VOB streaming support (Brian Foley) +- better MP3 autodetection (Andriy Rysin) +- qpel encoding (Michael) +- 4mv+b frames encoding finally fixed (Michael) +- chroma ME (Michael) +- 5 comparison functions for ME (Michael) +- B-frame encoding speedup (Michael) +- WMV2 codec (unfinished - Michael) +- user specified diamond size for EPZS (Michael) +- Playstation STR playback subsystem, still experimental (Mike and Michael) +- ASV2 codec (Michael) +- CLJR decoder (Alex) + +.. And lots more new enhancements and fixes. + +version 0.4.6: + +- completely new integer only MPEG audio layer 1/2/3 decoder rewritten + from scratch +- Recoded DCT and motion vector search with gcc (no longer depends on nasm) +- fix quantization bug in AC3 encoder +- added PCM codecs and format. Corrected WAV/AVI/ASF PCM issues +- added prototype ffplay program +- added GOB header parsing on H.263/H.263+ decoder (Juanjo) +- bug fix on MCBPC tables of H.263 (Juanjo) +- bug fix on DC coefficients of H.263 (Juanjo) +- added Advanced Prediction Mode on H.263/H.263+ decoder (Juanjo) +- now we can decode H.263 streams found in QuickTime files (Juanjo) +- now we can decode H.263 streams found in VIVO v1 files(Juanjo) +- preliminary RTP "friendly" mode for H.263/H.263+ coding. (Juanjo) +- added GOB header for H.263/H.263+ coding on RTP mode (Juanjo) +- now H.263 picture size is returned on the first decoded frame (Juanjo) +- added first regression tests +- added MPEG-2 TS demuxer +- new demux API for libav +- more accurate and faster IDCT (Michael) +- faster and entropy-controlled motion search (Michael) +- two pass video encoding (Michael) +- new video rate control (Michael) +- added MSMPEG4V1, MSMPEGV2 and WMV1 support (Michael) +- great performance improvement of video encoders and decoders (Michael) +- new and faster bit readers and vlc parsers (Michael) +- high quality encoding mode: tries all macroblock/VLC types (Michael) +- added DV video decoder +- preliminary RTP/RTSP support in ffserver and libavformat +- H.263+ AIC decoding/encoding support (Juanjo) +- VCD MPEG-PS mode (Juanjo) +- PSNR stuff (Juanjo) +- simple stats output (Juanjo) +- 16-bit and 15-bit RGB/BGR/GBR support (Bisqwit) + +version 0.4.5: + +- some header fixes (Zdenek Kabelac ) +- many MMX optimizations (Nick Kurshev ) +- added configure system (actually a small shell script) +- added MPEG audio layer 1/2/3 decoding using LGPL'ed mpglib by + Michael Hipp (temporary solution - waiting for integer only + decoder) +- fixed VIDIOCSYNC interrupt +- added Intel H.263 decoding support ('I263' AVI fourCC) +- added Real Video 1.0 decoding (needs further testing) +- simplified image formats again. Added PGM format (=grey + pgm). Renamed old PGM to PGMYUV. +- fixed msmpeg4 slice issues (tell me if you still find problems) +- fixed OpenDivX bugs with newer versions (added VOL header decoding) +- added support for MPlayer interface +- added macroblock skip optimization +- added MJPEG decoder +- added mmx/mmxext IDCT from libmpeg2 +- added pgmyuvpipe, ppm, and ppm_pipe formats (original patch by Celer + ) +- added pixel format conversion layer (e.g. for MJPEG or PPM) +- added deinterlacing option +- MPEG-1/2 fixes +- MPEG-4 vol header fixes (Jonathan Marsden ) +- ARM optimizations (Lionel Ulmer ). +- Windows porting of file converter +- added MJPEG raw format (input/ouput) +- added JPEG image format support (input/output) + +version 0.4.4: + +- fixed some std header definitions (Bjorn Lindgren + ). +- added MPEG demuxer (MPEG-1 and 2 compatible). +- added ASF demuxer +- added prototype RM demuxer +- added AC3 decoding (done with libac3 by Aaron Holtzman) +- added decoding codec parameter guessing (.e.g. for MPEG, because the + header does not include them) +- fixed header generation in MPEG-1, AVI and ASF muxer: wmplayer can now + play them (only tested video) +- fixed H.263 white bug +- fixed phase rounding in img resample filter +- add MMX code for polyphase img resample filter +- added CPU autodetection +- added generic title/author/copyright/comment string handling (ASF and RM + use them) +- added SWF demux to extract MP3 track (not usable yet because no MP3 + decoder) +- added fractional frame rate support +- codecs are no longer searched by read_header() (should fix ffserver + segfault) + +version 0.4.3: + +- BGR24 patch (initial patch by Jeroen Vreeken ) +- fixed raw yuv output +- added motion rounding support in MPEG-4 +- fixed motion bug rounding in MSMPEG4 +- added B-frame handling in video core +- added full MPEG-1 decoding support +- added partial (frame only) MPEG-2 support +- changed the FOURCC code for H.263 to "U263" to be able to see the + +AVI/H.263 file with the UB Video H.263+ decoder. MPlayer works with + this +codec ;) (JuanJo). +- Halfpel motion estimation after MB type selection (JuanJo) +- added pgm and .Y.U.V output format +- suppressed 'img:' protocol. Simply use: /tmp/test%d.[pgm|Y] as input or + output. +- added pgmpipe I/O format (original patch from Martin Aumueller + , but changed completely since we use a format + instead of a protocol) + +version 0.4.2: + +- added H.263/MPEG-4/MSMPEG4 decoding support. MPEG-4 decoding support + (for OpenDivX) is almost complete: 8x8 MVs and rounding are + missing. MSMPEG4 support is complete. +- added prototype MPEG-1 decoder. Only I- and P-frames handled yet (it + can decode ffmpeg MPEGs :-)). +- added libavcodec API documentation (see apiexample.c). +- fixed image polyphase bug (the bottom of some images could be + greenish) +- added support for non clipped motion vectors (decoding only) + and image sizes non-multiple of 16 +- added support for AC prediction (decoding only) +- added file overwrite confirmation (can be disabled with -y) +- added custom size picture to H.263 using H.263+ (Juanjo) + +version 0.4.1: + +- added MSMPEG4 (aka DivX) compatible encoder. Changed default codec + of AVI and ASF to DIV3. +- added -me option to set motion estimation method + (default=log). suppressed redundant -hq option. +- added options -acodec and -vcodec to force a given codec (useful for + AVI for example) +- fixed -an option +- improved dct_quantize speed +- factorized some motion estimation code + +version 0.4.0: + +- removing grab code from ffserver and moved it to ffmpeg. Added + multistream support to ffmpeg. +- added timeshifting support for live feeds (option ?date=xxx in the + URL) +- added high quality image resize code with polyphase filter (need + mmx/see optimisation). Enable multiple image size support in ffserver. +- added multi live feed support in ffserver +- suppressed master feature from ffserver (it should be done with an + external program which opens the .ffm url and writes it to another + ffserver) +- added preliminary support for video stream parsing (WAV and AVI half + done). Added proper support for audio/video file conversion in + ffmpeg. +- added preliminary support for video file sending from ffserver +- redesigning I/O subsystem: now using URL based input and output + (see avio.h) +- added WAV format support +- added "tty user interface" to ffmpeg to stop grabbing gracefully +- added MMX/SSE optimizations to SAD (Sums of Absolutes Differences) + (Juan J. Sierralta P. a.k.a. "Juanjo" ) +- added MMX DCT from mpeg2_movie 1.5 (Juanjo) +- added new motion estimation algorithms, log and phods (Juanjo) +- changed directories: libav for format handling, libavcodec for + codecs + +version 0.3.4: + +- added stereo in MPEG audio encoder + +version 0.3.3: + +- added 'high quality' mode which use motion vectors. It can be used in + real time at low resolution. +- fixed rounding problems which caused quality problems at high + bitrates and large GOP size + +version 0.3.2: small fixes + +- ASF fixes +- put_seek bug fix + +version 0.3.1: added avi/divx support + +- added AVI support +- added MPEG-4 codec compatible with OpenDivX. It is based on the H.263 codec +- added sound for flash format (not tested) + +version 0.3: initial public release diff --git a/mpeg4/src/Doxyfile b/mpeg4/src/Doxyfile new file mode 100644 index 0000000000000000000000000000000000000000..73b4e992fd40a6ac135f9d679f5d3da1e402fd7a --- /dev/null +++ b/mpeg4/src/Doxyfile @@ -0,0 +1,1038 @@ +# Doxyfile 1.3-rc1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# General configuration options +#--------------------------------------------------------------------------- + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = ffmpeg + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doxy + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, +# Finnish, French, German, Greek, Hungarian, Italian, Japanese, Japanese-en +# (Japanese with english messages), Korean, Norwegian, Polish, Portuguese, +# Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish and Ukrainian. + +OUTPUT_LANGUAGE = English + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = YES + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these class will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all inherited +# members of a class in the documentation of that class as if those members were +# ordinary class members. Constructors, destructors and assignment operators of +# the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. It is allowed to use relative paths in the argument list. + +STRIP_FROM_PATH = . + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower case letters. If set to YES upper case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# users are adviced to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like the Qt-style comments (thus requiring an +# explict @brief command for a brief description. + +JAVADOC_AUTOBRIEF = YES + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the DETAILS_AT_TOP tag is set to YES then Doxygen +# will output the detailed description near the top, like JavaDoc. +# If set to NO, the detailed description appears after the member +# documentation. + +DETAILS_AT_TOP = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# reimplements. + +INHERIT_DOCS = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consist of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources +# only. Doxygen will then generate output that is more tailored for C. +# For instance some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java sources +# only. Doxygen will then generate output that is more tailored for Java. +# For instance namespaces will be presented as packages, qualified scopes +# will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx *.hpp +# *.h++ *.idl *.odl + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or directories +# that are symbolic links (a Unix filesystem feature) are excluded from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. + +EXCLUDE_PATTERNS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. + +INPUT_FILTER = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES (the default) +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES (the default) +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output dir. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non empty doxygen will try to run +# the html help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the Html help documentation and to the tree view. + +TOC_EXPAND = NO + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be +# generated containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript and frames is required (for instance Mozilla, Netscape 4.0+, +# or Internet explorer 4.0+). Note that for large projects the tree generation +# can take a very long time. In such cases it is better to disable this feature. +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = NO + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimised for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assigments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_XML = NO + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_PREDEFINED tags. + +EXPAND_ONLY_PREDEF = YES + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. + +PREDEFINED = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \ + HAVE_BUILTIN_VECTOR HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_3DNOW \ + ATTR_ALIGN(x)="" + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +#EXPAND_AS_DEFINED = FF_COMMON_FRAME +EXPAND_AS_DEFINED = declare_idct(idct, table, idct_row_head, idct_row, idct_row_tail, idct_row_mid) + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse the +# parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::addtions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES tag can be used to specify one or more tagfiles. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in Html, RTF and LaTeX) for classes with base or +# super classes. Setting the tag to NO turns the diagrams off. Note that this +# option is superceded by the HAVE_DOT option below. This is only a fallback. It is +# recommended to install and use dot, since it yield more powerful graphs. + +CLASS_DIAGRAMS = YES + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found on the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_WIDTH = 1024 + +# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height +# (in pixels) of the graphs generated by dot. If a graph becomes larger than +# this value, doxygen will try to truncate the graph, so that it fits within +# the specified constraint. Beware that most browsers cannot cope with very +# large images. + +MAX_DOT_GRAPH_HEIGHT = 1024 + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermedate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES + +#--------------------------------------------------------------------------- +# Configuration::addtions related to the search engine +#--------------------------------------------------------------------------- + +# The SEARCHENGINE tag specifies whether or not a search engine should be +# used. If set to NO the values of all tags below this one will be ignored. + +SEARCHENGINE = NO + +# The CGI_NAME tag should be the name of the CGI script that +# starts the search engine (doxysearch) with the correct parameters. +# A script with this name will be generated by doxygen. + +CGI_NAME = search.cgi + +# The CGI_URL tag should be the absolute URL to the directory where the +# cgi binaries are located. See the documentation of your http daemon for +# details. + +CGI_URL = + +# The DOC_URL tag should be the absolute URL to the directory where the +# documentation is located. If left blank the absolute path to the +# documentation, with file:// prepended to it, will be used. + +DOC_URL = + +# The DOC_ABSPATH tag should be the absolute path to the directory where the +# documentation is located. If left blank the directory on the local machine +# will be used. + +DOC_ABSPATH = + +# The BIN_ABSPATH tag must point to the directory where the doxysearch binary +# is installed. + +BIN_ABSPATH = /usr/local/bin/ + +# The EXT_DOC_PATHS tag can be used to specify one or more paths to +# documentation generated for other projects. This allows doxysearch to search +# the documentation for these projects as well. + +EXT_DOC_PATHS = diff --git a/mpeg4/src/INSTALL b/mpeg4/src/INSTALL new file mode 100644 index 0000000000000000000000000000000000000000..a636c5367ab13331385ac8e51b3fc4d17972f06e --- /dev/null +++ b/mpeg4/src/INSTALL @@ -0,0 +1,14 @@ + +1) Type './configure' create the configuration (use './configure +--help' to have the configure options). + +'configure' can be launched from another directory than the ffmpeg +sources to put the objects at that place. In that case, use an +absolute path when launching 'configure', +e.g. /ffmpegdir/ffmpeg/configure. + +2) Then type 'make' to build ffmpeg. On BSD systems, type 'gmake' +instead of 'make'. You may need to install GNU make first. + +3) Type 'make install' to install ffmpeg and ffserver in +/usr/local/bin. diff --git a/mpeg4/src/MAINTAINERS b/mpeg4/src/MAINTAINERS new file mode 100644 index 0000000000000000000000000000000000000000..373c65aac086605cc7778344feb43063d89549b0 --- /dev/null +++ b/mpeg4/src/MAINTAINERS @@ -0,0 +1,220 @@ +FFmpeg maintainers +================== + +Below is a list of the people maintaining different parts of the +FFmpeg code. + + +Project Leader +============== + +Michael Niedermayer + final design decisions + + +Applications +============ + +ffmpeg: + ffmpeg.c Michael Niedermayer + + Video Hooks: + vhook + vhook/watermark.c Marcus Engene + vhook/ppm.c + vhook/drawtext.c + vhook/fish.c + vhook/null.c + vhook/imlib2.c + +ffplay: + ffplay.c + +ffserver: + ffserver.c, ffserver.h + +Commandline utility code: + cmdutils.c, cmdutils.h + +QuickTime faststart: + qt-faststart.c Mike Melanson + + +Miscellaneous Areas +=================== + +documentation Mike Melanson, Diego Biurrun +website Mike Melanson, Diego Biurrun +build system (configure,Makefiles) Diego Biurrun +project server Diego Biurrun, Mans Rullgard + + +libavutil +========= + +External Interfaces: + libavutil/avutil.h +Internal Interfaces: + libavutil/common.h + +Other: + intfloat* Michael Niedermayer + rational.c, rational.h Michael Niedermayer + mathematics.c, mathematics.h Michael Niedermayer + integer.c, integer.h Michael Niedermayer + bswap.h + + +libavcodec +========== + +Generic Parts: + External Interfaces: + avcodec.h Michael Niedermayer + utility code: + utils.c + mem.c + opt.c, opt.h + arithmetic expression evaluator: + eval.c Michael Niedermayer + audio and video frame extraction: + parser.c + bitsream reading: + bitstream.c, bitstream.h Michael Niedermayer + CABAC: + cabac.h, cabac.c Michael Niedermayer + DSP utilities: + dsputils.c, dsputils.h Michael Niedermayer + entropy coding: + rangecoder.c, rangecoder.h Michael Niedermayer + floating point AAN DCT: + faandct.c, faandct.h Michael Niedermayer + Golomb coding: + golomb.c, golomb.h Michael Niedermayer + motion estimation: + motion* Michael Niedermayer + rate control: + ratecontrol.c + xvid_rc.c Michael Niedermayer + simple IDCT: + simple_idct.c, simple_idct.h Michael Niedermayer + postprocessing: + libpostproc/* Michael Niedermayer + +Codecs: + 4xm.c Michael Niedermayer + 8bps.c Roberto Togni + aasc.c Kostya Shishkov + asv* Michael Niedermayer + bmp.c Mans Rullgard + cinepak.c Roberto Togni + cljr Alex Beregszaszi + cook.c, cookdata.h Benjamin Larsson + cscd.c Reimar Doeffinger + dpcm.c Mike Melanson + dv.c Roman Shaposhnik + ffv1.c Michael Niedermayer + flac.c Alex Beregszaszi + flicvideo.c Mike Melanson + g726.c Roman Shaposhnik + h264* Loren Merritt, Michael Niedermayer + h261* Michael Niedermayer + h263* Michael Niedermayer + huffyuv.c Michael Niedermayer + idcinvideo.c Mike Melanson + indeo2* Kostya Shishkov + interplayvideo.c Mike Melanson + jpeg_ls.c Kostya Shishkov + kmvc.c Kostya Shishkov + lcl.c Roberto Togni + loco.c Kostya Shishkov + lzo.h, lzo.c Reimar Doeffinger + mdec.c Michael Niedermayer + mjpeg.c Michael Niedermayer + mpeg12.c, mpeg12data.h Michael Niedermayer + mpegvideo.c, mpegvideo.h Michael Niedermayer + msmpeg4.c, msmpeg4data.h Michael Niedermayer + msrle.c Mike Melanson + msvideo1.c Mike Melanson + nuv.c Reimar Doeffinger + oggtheora.c Mans Rullgard + qdm2.c, qdm2data.h Roberto Togni + qdrw.c Kostya Shishkov + qpeg.c Kostya Shishkov + qtrle.c Mike Melanson + ra144.c, ra144.h, ra288.c, ra288.h Roberto Togni + resample2.c Michael Niedermayer + rpza.c Roberto Togni + rtjpeg.c, rtjpeg.h Reimar Doeffinger + rv10.c Michael Niedermayer + smc.c Mike Melanson + snow.c Michael Niedermayer, Loren Merritt + sonic.c Alex Beregszaszi + svq3.c Michael Niedermayer + truemotion1* Mike Melanson + truemotion2* Kostya Shishkov + truespeech.c Kostya Shishkov + tscc.c Kostya Shishkov + ulti* Kostya Shishkov + vcr1.c Michael Niedermayer + vp3* Mike Melanson + vqavideo.c Mike Melanson + wmv2.c Michael Niedermayer + wnv1.c Kostya Shishkov + x264.c Mans Rullgard + xan.c Mike Melanson + xl.c Kostya Shishkov + xvmcvideo.c Ivan Kalvachev + zmbv.c Kostya Shishkov + + +libavformat +=========== + +Generic parts: + External Interface: + libavcodec/avcodec.h + Utility Code: + libavcodec/utils.c + + +Muxers/Demuxers: + 4xm.c Mike Melanson + adtsenc.c Mans Rullgard + avi* Michael Niedermayer + crc.c Michael Niedermayer + daud.c Reimar Doeffinger + dc1394.c, dv.c Roman Shaposhnik + flic.c Mike Melanson + flvdec.c, flvenc.c Michael Niedermayer + idcin.c Mike Melanson + idroq.c Mike Melanson + ipmovie.c Mike Melanson + img2.c Michael Niedermayer + mov.c Francois Revol, Michael Niedermayer + mpegts* Mans Rullgard + nsvdec.c Francois Revol + nut.c Alex Beregszaszi + nuv.c Reimar Doeffinger + ogg2.c, ogg2.h Mans Rullgard + oggparsevorbis.c Mans Rullgard + psxstr.c Mike Melanson + raw.c Michael Niedermayer + rm.c Roberto Togni + segafilm.c Mike Melanson + v4l2.c Luca Abeni + voc.c Aurelien Jacobs + wav.c Michael Niedermayer + wc3movie.c Mike Melanson + westwood.c Mike Melanson + + +Operating systems / CPU architectures +===================================== + +Alpha Mans Rullgard, Falk Hueffner +BeOS Francois Revol +i386 Michael Niedermayer +Mac OS X / PowerPC Romain Dolbeau +Amiga / PowerPC Colin Ward +Linux / PowerPC Luca Barbato diff --git a/mpeg4/src/Makefile b/mpeg4/src/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..4e91630c4b195bdaa81368faeb1a5793cbeed1f5 --- /dev/null +++ b/mpeg4/src/Makefile @@ -0,0 +1,217 @@ +# +# Main ffmpeg Makefile +# (c) 2000-2004 Fabrice Bellard +# +include config.mak + +VPATH=$(SRC_PATH) + +CFLAGS=$(OPTFLAGS) -I. -I$(SRC_PATH) -I$(SRC_PATH)/libavutil -I$(SRC_PATH)/libavcodec -I$(SRC_PATH)/libavformat -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE +LDFLAGS+= -g + +ifeq ($(TARGET_GPROF),yes) +CFLAGS+=-p +LDFLAGS+=-p +endif + +MANPAGE=doc/ffmpeg.1 +PROG_G+=ffmpeg_g$(EXESUF) +PROG+=ffmpeg$(EXESUF) +PROGTEST=output_example$(EXESUF) +QTFASTSTART=qt-faststart$(EXESUF) + +ifeq ($(CONFIG_FFSERVER),yes) +MANPAGE+=doc/ffserver.1 +PROG+=ffserver$(EXESUF) +endif + +ifeq ($(CONFIG_FFPLAY),yes) +MANPAGE+=doc/ffplay.1 +PROG_G+=ffplay_g$(EXESUF) +PROG+=ffplay$(EXESUF) +FFPLAY_O=ffplay.o +endif + +ifeq ($(CONFIG_AUDIO_BEOS),yes) +EXTRALIBS+=-lmedia -lbe +endif + +ifeq ($(BUILD_SHARED),yes) +DEP_LIBS=libavcodec/$(SLIBPREF)avcodec$(SLIBSUF) libavformat/$(SLIBPREF)avformat$(SLIBSUF) +else +DEP_LIBS=libavcodec/$(LIBPREF)avcodec$(LIBSUF) libavformat/$(LIBPREF)avformat$(LIBSUF) +endif + +ifeq ($(BUILD_VHOOK),yes) +VHOOK=videohook +INSTALLVHOOK=install-vhook +endif + +ifeq ($(TARGET_OS), SunOS) +TEST=/usr/bin/test +else +TEST=test +endif + +ifeq ($(BUILD_DOC),yes) +DOC=documentation +endif + +OBJS = ffmpeg.o ffserver.o cmdutils.o $(FFPLAY_O) +SRCS = $(OBJS:.o=.c) $(ASM_OBJS:.o=.s) +FFLIBS = -L./libavformat -lavformat$(BUILDSUF) -L./libavcodec -lavcodec$(BUILDSUF) -L./libavutil -lavutil$(BUILDSUF) + +all: lib $(PROG_G) $(PROG) $(PROGTEST) $(VHOOK) $(QTFASTSTART) $(DOC) + +lib: + $(MAKE) -C libavutil all + $(MAKE) -C libavcodec all + $(MAKE) -C libavformat all +ifeq ($(CONFIG_PP),yes) + $(MAKE) -C libavcodec/libpostproc all +endif + +ffmpeg_g$(EXESUF): ffmpeg.o cmdutils.o .libs + $(CC) $(LDFLAGS) -o $@ ffmpeg.o cmdutils.o $(FFLIBS) $(EXTRALIBS) + +ffmpeg$(EXESUF): ffmpeg_g$(EXESUF) + cp -p $< $@ + $(STRIP) $@ + +ffserver$(EXESUF): ffserver.o .libs + $(CC) $(LDFLAGS) $(FFSLDFLAGS) -o $@ ffserver.o $(FFLIBS) $(EXTRALIBS) + +ffplay_g$(EXESUF): ffplay.o cmdutils.o .libs + $(CC) $(LDFLAGS) -o $@ ffplay.o cmdutils.o $(FFLIBS) $(EXTRALIBS) $(SDL_LIBS) + +ffplay$(EXESUF): ffplay_g$(EXESUF) + cp -p $< $@ + $(STRIP) $@ + +output_example$(EXESUF): output_example.o .libs + $(CC) $(LDFLAGS) -o $@ output_example.o $(FFLIBS) $(EXTRALIBS) + +qt-faststart$(EXESUF): qt-faststart.c + $(CC) $(CFLAGS) $(SRC_PATH)/qt-faststart.c -o qt-faststart$(EXESUF) + +cws2fws$(EXESUF): cws2fws.c + $(CC) $(SRC_PATH)/cws2fws.c -o cws2fws$(EXESUF) -lz + +ffplay.o: ffplay.c + $(CC) $(CFLAGS) $(SDL_CFLAGS) -c -o $@ $< + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +videohook: .libs + $(MAKE) -C vhook all + +documentation: + $(MAKE) -C doc all + +.PHONY: install + +install: install-progs install-libs install-headers install-man $(INSTALLVHOOK) + +ifeq ($(BUILD_SHARED),yes) +install-progs: $(PROG) install-libs +else +install-progs: $(PROG) +endif + install -d "$(bindir)" + install -c $(INSTALLSTRIP) -m 755 $(PROG) "$(bindir)" + +# create the window installer +wininstaller: all install + makensis ffinstall.nsi + +# install man from source dir if available +install-man: +ifneq ($(CONFIG_WIN32),yes) + if [ -f doc/ffmpeg.1 ] ; then \ + install -d "$(mandir)/man1" ; \ + install -m 644 $(MANPAGE) "$(mandir)/man1" ; \ + fi +endif + +install-vhook: + $(MAKE) -C vhook install + +install-libs: + $(MAKE) -C libavutil install-libs + $(MAKE) -C libavcodec install-libs + $(MAKE) -C libavformat install-libs +ifeq ($(CONFIG_PP),yes) + $(MAKE) -C libavcodec/libpostproc install-libs +endif +ifeq ($(BUILD_SHARED),yes) + -$(LDCONFIG) +endif + +install-headers: + $(MAKE) -C libavutil install-headers + $(MAKE) -C libavcodec install-headers + $(MAKE) -C libavformat install-headers +ifeq ($(CONFIG_PP),yes) + $(MAKE) -C libavcodec/libpostproc install-headers +endif + +dep: depend + +depend: .depend + $(MAKE) -C libavcodec depend + $(MAKE) -C libavformat depend +ifeq ($(BUILD_VHOOK),yes) + $(MAKE) -C vhook depend +endif + +.depend: $(SRCS) + $(CC) -MM $(CFLAGS) $(SDL_CFLAGS) $^ 1>.depend + +.libs: lib + @test -f .libs || touch .libs + @for i in $(DEP_LIBS) ; do if $(TEST) $$i -nt .libs ; then touch .libs; fi ; done + +clean: + $(MAKE) -C libavutil clean + $(MAKE) -C libavcodec clean + $(MAKE) -C libavformat clean + $(MAKE) -C libavcodec/libpostproc clean + $(MAKE) -C tests clean + $(MAKE) -C vhook clean + rm -f *.o *.d *~ .libs gmon.out TAGS \ + $(PROG) $(PROG_G) $(PROGTEST) $(QTFASTSTART) + +# Note well: config.log is NOT removed. +distclean: clean + $(MAKE) -C libavutil distclean + $(MAKE) -C libavcodec distclean + $(MAKE) -C libavformat distclean + $(MAKE) -C libavcodec/libpostproc distclean + $(MAKE) -C tests distclean + $(MAKE) -C vhook distclean + rm -f .depend config.mak config.h *.pc + +TAGS: + etags *.[ch] libavformat/*.[ch] libavcodec/*.[ch] + +# regression tests + +libavtest test mpeg4 mpeg test-server fulltest: $(PROG) + $(MAKE) -C tests $@ + +# tar release (use 'make -k tar' on a checkouted tree) +FILE=ffmpeg-$(shell grep "\#define FFMPEG_VERSION " libavcodec/avcodec.h | \ + cut -d "\"" -f 2 ) + +tar: + rm -rf /tmp/$(FILE) + cp -r . /tmp/$(FILE) + ( cd /tmp ; tar zcvf ~/$(FILE).tar.gz $(FILE) --exclude CVS ) + rm -rf /tmp/$(FILE) + +.PHONY: lib + +ifneq ($(wildcard .depend),) +include .depend +endif diff --git a/mpeg4/src/README b/mpeg4/src/README new file mode 100644 index 0000000000000000000000000000000000000000..ce3e0d6922013fdb0a56781f0167b47da6bb81e4 --- /dev/null +++ b/mpeg4/src/README @@ -0,0 +1,19 @@ +FFmpeg README +------------- + +1) Documentation +---------------- + +* Read the documentation in the doc/ directory. + +2) Licensing +------------ + +* Read the file COPYING. ffmpeg and the associated libraries EXCEPT + liba52 and libpostproc are licensed under the Lesser GNU General + Public License. + +* liba52 and libpostproc are distributed under the GNU General Public + License and their compilation and use is optional in ffmpeg. + +Fabrice Bellard. \ No newline at end of file diff --git a/mpeg4/src/berrno.h b/mpeg4/src/berrno.h new file mode 100644 index 0000000000000000000000000000000000000000..eb3bd0cd41ece500bab271255abbac3a0af9c204 --- /dev/null +++ b/mpeg4/src/berrno.h @@ -0,0 +1,44 @@ +#ifndef BERRNO_H +#define BERRNO_H + +#include + +// mmu_man: this is needed for http.c (defined errno) +#include + +#ifdef ENOENT +#undef ENOENT +#endif +#define ENOENT 2 + +#ifdef EINTR +#undef EINTR +#endif +#define EINTR 4 + +#ifdef EIO +#undef EIO +#endif +#define EIO 5 + +#ifdef EAGAIN +#undef EAGAIN +#endif +#define EAGAIN 11 + +#ifdef ENOMEM +#undef ENOMEM +#endif +#define ENOMEM 12 + +#ifdef EINVAL +#undef EINVAL +#endif +#define EINVAL 22 + +#ifdef EPIPE +#undef EPIPE +#endif +#define EPIPE 32 + +#endif /* BERRNO_H */ diff --git a/mpeg4/src/build_avopt b/mpeg4/src/build_avopt new file mode 100755 index 0000000000000000000000000000000000000000..fcf165765c174497fdafca4a8c7814b58d5a91b4 --- /dev/null +++ b/mpeg4/src/build_avopt @@ -0,0 +1,9 @@ +#!/bin/sh +sed 's/unsigned//g' |\ + sed 's/enum//g' |\ + egrep '^ *(int|float|double|AVRational|char *\*) *[a-zA-Z_0-9]* *;' |\ + sed 's/^ *\([^ ]*\)[ *]*\([^;]*\);.*$/{"\2", NULL, OFFSET(\2), FF_OPT_TYPE_\U\1, DEFAULT, \1_MIN, \1_MAX},/' |\ + sed 's/AVRATIONAL_M/INT_M/g'|\ + sed 's/TYPE_AVRATIONAL/TYPE_RATIONAL/g'|\ + sed 's/FLOAT_M/FLT_M/g'|\ + sed 's/FF_OPT_TYPE_CHAR/FF_OPT_TYPE_STRING/g' diff --git a/mpeg4/src/clean-diff b/mpeg4/src/clean-diff new file mode 100755 index 0000000000000000000000000000000000000000..3a6d19eede5199e622e6bdbdd8378b69aa79cacd --- /dev/null +++ b/mpeg4/src/clean-diff @@ -0,0 +1,11 @@ +#!/bin/sh +sed '/^+[^+]/!s/ /TaBBaT/g' |\ + expand -t `seq -s , 9 8 200` |\ + sed 's/TaBBaT/ /g' |\ + sed '/^+[^+]/s/ * $//' |\ + tr -d '\015' |\ + tr '\n' '' |\ + sed 's/\(@@[^@]*@@[^@]*\)/\n\1/g' |\ + egrep -v '@@[^@]*@@(( [^]*)|([+-][[:space:]]*)|(-[[:space:]]*([^]*)\+[[:space:]]*\5))*$' |\ + tr -d '\n' |\ + tr '' '\n' \ No newline at end of file diff --git a/mpeg4/src/cmdutils.c b/mpeg4/src/cmdutils.c new file mode 100644 index 0000000000000000000000000000000000000000..32f380838d1c12c4c1331fc3246f4e29044b0ed8 --- /dev/null +++ b/mpeg4/src/cmdutils.c @@ -0,0 +1,135 @@ +/* + * Various utilities for command line tools + * Copyright (c) 2000-2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#define HAVE_AV_CONFIG_H +#include "avformat.h" +#include "common.h" + +#include "cmdutils.h" + +void show_help_options(const OptionDef *options, const char *msg, int mask, int value) +{ + const OptionDef *po; + int first; + + first = 1; + for(po = options; po->name != NULL; po++) { + char buf[64]; + if ((po->flags & mask) == value) { + if (first) { + printf("%s", msg); + first = 0; + } + pstrcpy(buf, sizeof(buf), po->name); + if (po->flags & HAS_ARG) { + pstrcat(buf, sizeof(buf), " "); + pstrcat(buf, sizeof(buf), po->argname); + } + printf("-%-17s %s\n", buf, po->help); + } + } +} + +static const OptionDef* find_option(const OptionDef *po, const char *name){ + while (po->name != NULL) { + if (!strcmp(name, po->name)) + break; + po++; + } + return po; +} + +void parse_options(int argc, char **argv, const OptionDef *options) +{ + const char *opt, *arg; + int optindex; + const OptionDef *po; + + /* parse options */ + optindex = 1; + while (optindex < argc) { + opt = argv[optindex++]; + + if (opt[0] == '-' && opt[1] != '\0') { + po= find_option(options, opt + 1); + if (!po->name) + po= find_option(options, "default"); + if (!po->name) { +unknown_opt: + fprintf(stderr, "%s: unrecognized option '%s'\n", argv[0], opt); + exit(1); + } + arg = NULL; + if (po->flags & HAS_ARG) { + arg = argv[optindex++]; + if (!arg) { + fprintf(stderr, "%s: missing argument for option '%s'\n", argv[0], opt); + exit(1); + } + } + if (po->flags & OPT_STRING) { + char *str; + str = av_strdup(arg); + *po->u.str_arg = str; + } else if (po->flags & OPT_BOOL) { + *po->u.int_arg = 1; + } else if (po->flags & OPT_INT) { + *po->u.int_arg = atoi(arg); + } else if (po->flags & OPT_FLOAT) { + *po->u.float_arg = atof(arg); + } else if (po->flags & OPT_FUNC2) { + if(po->u.func2_arg(opt+1, arg)<0) + goto unknown_opt; + } else { + po->u.func_arg(arg); + } + } else { + parse_arg_file(opt); + } + } +} + +void print_error(const char *filename, int err) +{ + switch(err) { + case AVERROR_NUMEXPECTED: + fprintf(stderr, "%s: Incorrect image filename syntax.\n" + "Use '%%d' to specify the image number:\n" + " for img1.jpg, img2.jpg, ..., use 'img%%d.jpg';\n" + " for img001.jpg, img002.jpg, ..., use 'img%%03d.jpg'.\n", + filename); + break; + case AVERROR_INVALIDDATA: + fprintf(stderr, "%s: Error while parsing header\n", filename); + break; + case AVERROR_NOFMT: + fprintf(stderr, "%s: Unknown format\n", filename); + break; + case AVERROR_IO: + fprintf(stderr, "%s: I/O error occured\n" + "Usually that means that input file is truncated and/or corrupted.\n", + filename); + break; + case AVERROR_NOMEM: + fprintf(stderr, "%s: memory allocation error occured\n", filename); + break; + default: + fprintf(stderr, "%s: Error while opening file\n", filename); + break; + } +} diff --git a/mpeg4/src/cmdutils.h b/mpeg4/src/cmdutils.h new file mode 100644 index 0000000000000000000000000000000000000000..d9c66f015e20a8d54f4cd0887bc96add196c8484 --- /dev/null +++ b/mpeg4/src/cmdutils.h @@ -0,0 +1,34 @@ +#ifndef _CMD_UTILS_H +#define _CMD_UTILS_H + +typedef struct { + const char *name; + int flags; +#define HAS_ARG 0x0001 +#define OPT_BOOL 0x0002 +#define OPT_EXPERT 0x0004 +#define OPT_STRING 0x0008 +#define OPT_VIDEO 0x0010 +#define OPT_AUDIO 0x0020 +#define OPT_GRAB 0x0040 +#define OPT_INT 0x0080 +#define OPT_FLOAT 0x0100 +#define OPT_SUBTITLE 0x0200 +#define OPT_FUNC2 0x0400 + union { + void (*func_arg)(const char *); //FIXME passing error code as int return would be nicer then exit() in the func + int *int_arg; + char **str_arg; + float *float_arg; + int (*func2_arg)(const char *, const char *); + } u; + const char *help; + const char *argname; +} OptionDef; + +void show_help_options(const OptionDef *options, const char *msg, int mask, int value); +void parse_options(int argc, char **argv, const OptionDef *options); +void parse_arg_file(const char *filename); +void print_error(const char *filename, int err); + +#endif /* _CMD_UTILS_H */ diff --git a/mpeg4/src/common.mak b/mpeg4/src/common.mak new file mode 100644 index 0000000000000000000000000000000000000000..3bcf1e96e66d030521536254a14f4b8af7f33065 --- /dev/null +++ b/mpeg4/src/common.mak @@ -0,0 +1,101 @@ +# +# common bits used by all libraries +# + +SRC_DIR = $(SRC_PATH)/$(SUBDIR) +VPATH = $(SRC_DIR) + +#FIXME: This should be in configure/config.mak +ifeq ($(CONFIG_WIN32),yes) +LDFLAGS = -Wl,--output-def,$(@:.dll=.def),--out-implib,lib$(SLIBNAME:$(SLIBSUF)=.dll.a) +endif + +ifeq ($(TARGET_GPROF),yes) +CFLAGS+=-p +LDFLAGS+=-p +endif + +ifeq ($(TARGET_ARCH_SPARC64),yes) +CFLAGS+= -mcpu=ultrasparc -mtune=ultrasparc +endif + +SRCS := $(OBJS:.o=.c) $(ASM_OBJS:.o=.S) $(CPPOBJS:.o=.cpp) +OBJS := $(OBJS) $(ASM_OBJS) $(CPPOBJS) +STATIC_OBJS := $(OBJS) $(STATIC_OBJS) +SHARED_OBJS := $(OBJS) $(SHARED_OBJS) + +all: $(LIB) $(SLIBNAME) + +$(LIB): $(STATIC_OBJS) + rm -f $@ + $(AR) rc $@ $^ $(EXTRAOBJS) + $(RANLIB) $@ + +$(SLIBNAME): $(SHARED_OBJS) + $(CC) $(SHFLAGS) $(LDFLAGS) -o $@ $^ $(EXTRALIBS) $(EXTRAOBJS) +ifeq ($(CONFIG_WIN32),yes) + -lib /machine:i386 /def:$(@:.dll=.def) +endif + +%.o: %.c + $(CC) $(CFLAGS) $(LIBOBJFLAGS) -c -o $@ $< + +%.o: %.S + $(CC) $(CFLAGS) $(LIBOBJFLAGS) -c -o $@ $< + +# BeOS: remove -Wall to get rid of all the "multibyte constant" warnings +%.o: %.cpp + g++ $(subst -Wall,,$(CFLAGS)) -c -o $@ $< + +depend: $(SRCS) + $(CC) -MM $(CFLAGS) $^ 1>.depend + +dep: depend + +clean:: + rm -f *.o *.d *~ *.a *.lib *.so *.dylib *.dll \ + *.lib *.def *.dll.a *.exp + +distclean: clean + rm -f .depend + +ifeq ($(BUILD_SHARED),yes) +INSTLIBTARGETS += install-lib-shared +endif +ifeq ($(BUILD_STATIC),yes) +INSTLIBTARGETS += install-lib-static +endif + +install: install-libs install-headers + +install-libs: $(INSTLIBTARGETS) + +install-lib-shared: $(SLIBNAME) + install -d "$(libdir)" +ifeq ($(CONFIG_WIN32),yes) + install $(INSTALLSTRIP) -m 755 $(SLIBNAME) "$(prefix)" +else + install $(INSTALLSTRIP) -m 755 $(SLIBNAME) \ + $(libdir)/$(SLIBNAME_WITH_VERSION) + ln -sf $(SLIBNAME_WITH_VERSION) \ + $(libdir)/$(SLIBNAME_WITH_MAJOR) + ln -sf $(SLIBNAME_WITH_VERSION) \ + $(libdir)/$(SLIBNAME) +endif + +install-lib-static: $(LIB) + install -d "$(libdir)" + install -m 644 $(LIB) "$(libdir)" + +install-headers: + install -d "$(incdir)" + install -d "$(libdir)/pkgconfig" + install -m 644 $(addprefix "$(SRC_DIR)"/,$(HEADERS)) "$(incdir)" + install -m 644 $(BUILD_ROOT)/lib$(NAME).pc "$(libdir)/pkgconfig" + +# +# include dependency files if they exist +# +ifneq ($(wildcard .depend),) +include .depend +endif diff --git a/mpeg4/src/config.h b/mpeg4/src/config.h new file mode 100644 index 0000000000000000000000000000000000000000..2c5be02d6afbfe6ec1a99d03aba51c7bfda79562 --- /dev/null +++ b/mpeg4/src/config.h @@ -0,0 +1,190 @@ +/* Automatically generated by configure - do not modify! */ +#define FFMPEG_CONFIGURATION " --disable-mmx --disable-ffserver --disable-ffplay --disable-strip " +//#define ARCH_X86_64 1 -- removed +#define TUNECPU generic +#define HAVE_BUILTIN_VECTOR 1 +#define HAVE_LOCALTIME_R 1 +#define HAVE_LRINTF 1 +#define HAVE_VHOOK 1 +#define CONFIG_ENCODERS 1 +#define CONFIG_DECODERS 1 +#define CONFIG_MUXERS 1 +#define CONFIG_DEMUXERS 1 +#define CONFIG_MPEGAUDIO_HP 1 +#define CONFIG_VIDEO4LINUX 1 +#define CONFIG_VIDEO4LINUX2 1 +#define CONFIG_DV1394 1 +#define CONFIG_HAVE_DLOPEN 1 +#define CONFIG_HAVE_DLFCN 1 +#define CONFIG_AUDIO_OSS 1 +#define CONFIG_NETWORK 1 +#define CONFIG_IPV6 1 +#define CONFIG_ZLIB 1 +//#define HAVE_MALLOC_H 1 -- defined in Android headers +#define HAVE_MEMALIGN 1 +#define SIMPLE_IDCT 1 +#define CONFIG_PROTOCOLS 1 +#define restrict __restrict__ +#define CONFIG_AC3_ENCODER 1 +#define CONFIG_MP2_ENCODER 1 +#define CONFIG_MP3LAME_ENCODER 1 +#define CONFIG_OGGVORBIS_ENCODER 1 +#define CONFIG_OGGVORBIS_DECODER 1 +#define CONFIG_OGGTHEORA_ENCODER 1 +#define CONFIG_OGGTHEORA_DECODER 1 +#define CONFIG_FAAC_ENCODER 1 +#define CONFIG_XVID_ENCODER 1 +#define CONFIG_MPEG1VIDEO_ENCODER 1 +#define CONFIG_H264_ENCODER 1 +#define CONFIG_MPEG2VIDEO_ENCODER 1 +#define CONFIG_H261_ENCODER 1 +#define CONFIG_H263_ENCODER 1 +#define CONFIG_H263P_ENCODER 1 +#define CONFIG_FLV_ENCODER 1 +#define CONFIG_RV10_ENCODER 1 +#define CONFIG_RV20_ENCODER 1 +#define CONFIG_MPEG4_ENCODER 1 +#define CONFIG_MSMPEG4V1_ENCODER 1 +#define CONFIG_MSMPEG4V2_ENCODER 1 +#define CONFIG_MSMPEG4V3_ENCODER 1 +#define CONFIG_WMV1_ENCODER 1 +#define CONFIG_WMV2_ENCODER 1 +#define CONFIG_SVQ1_ENCODER 1 +#define CONFIG_MJPEG_ENCODER 1 +#define CONFIG_LJPEG_ENCODER 1 +#define CONFIG_JPEGLS_ENCODER 1 +#define CONFIG_PNG_ENCODER 1 +#define CONFIG_PPM_ENCODER 1 +#define CONFIG_PGM_ENCODER 1 +#define CONFIG_PGMYUV_ENCODER 1 +#define CONFIG_PBM_ENCODER 1 +#define CONFIG_PAM_ENCODER 1 +#define CONFIG_HUFFYUV_ENCODER 1 +#define CONFIG_FFVHUFF_ENCODER 1 +#define CONFIG_ASV1_ENCODER 1 +#define CONFIG_ASV2_ENCODER 1 +#define CONFIG_FFV1_ENCODER 1 +#define CONFIG_SNOW_ENCODER 1 +#define CONFIG_ZLIB_ENCODER 1 +#define CONFIG_DVVIDEO_ENCODER 1 +#define CONFIG_SONIC_ENCODER 1 +#define CONFIG_SONIC_LS_ENCODER 1 +#define CONFIG_X264_ENCODER 1 +#define CONFIG_LIBGSM_ENCODER 1 +#define CONFIG_RAWVIDEO_ENCODER 1 +#define CONFIG_H263_DECODER 1 +#define CONFIG_H261_DECODER 1 +#define CONFIG_MPEG4_DECODER 1 +#define CONFIG_MSMPEG4V1_DECODER 1 +#define CONFIG_MSMPEG4V2_DECODER 1 +#define CONFIG_MSMPEG4V3_DECODER 1 +#define CONFIG_WMV1_DECODER 1 +#define CONFIG_WMV2_DECODER 1 +#define CONFIG_VC9_DECODER 1 +#define CONFIG_WMV3_DECODER 1 +#define CONFIG_H263I_DECODER 1 +#define CONFIG_FLV_DECODER 1 +#define CONFIG_RV10_DECODER 1 +#define CONFIG_RV20_DECODER 1 +#define CONFIG_SVQ1_DECODER 1 +#define CONFIG_SVQ3_DECODER 1 +#define CONFIG_WMAV1_DECODER 1 +#define CONFIG_WMAV2_DECODER 1 +#define CONFIG_INDEO2_DECODER 1 +#define CONFIG_INDEO3_DECODER 1 +#define CONFIG_TSCC_DECODER 1 +#define CONFIG_CSCD_DECODER 1 +#define CONFIG_NUV_DECODER 1 +#define CONFIG_ULTI_DECODER 1 +#define CONFIG_QDRAW_DECODER 1 +#define CONFIG_XL_DECODER 1 +#define CONFIG_QPEG_DECODER 1 +#define CONFIG_LOCO_DECODER 1 +#define CONFIG_KMVC_DECODER 1 +#define CONFIG_WNV1_DECODER 1 +#define CONFIG_AASC_DECODER 1 +#define CONFIG_FRAPS_DECODER 1 +#define CONFIG_AAC_DECODER 1 +#define CONFIG_MPEG4AAC_DECODER 1 +#define CONFIG_MPEG1VIDEO_DECODER 1 +#define CONFIG_MPEG2VIDEO_DECODER 1 +#define CONFIG_MPEGVIDEO_DECODER 1 +#define CONFIG_MPEG_XVMC_DECODER 1 +#define CONFIG_DVVIDEO_DECODER 1 +#define CONFIG_MJPEG_DECODER 1 +#define CONFIG_MJPEGB_DECODER 1 +#define CONFIG_SP5X_DECODER 1 +#define CONFIG_PNG_DECODER 1 +#define CONFIG_MP2_DECODER 1 +#define CONFIG_MP3_DECODER 1 +#define CONFIG_MP3ADU_DECODER 1 +#define CONFIG_MP3ON4_DECODER 1 +#define CONFIG_MACE3_DECODER 1 +#define CONFIG_MACE6_DECODER 1 +#define CONFIG_HUFFYUV_DECODER 1 +#define CONFIG_FFVHUFF_DECODER 1 +#define CONFIG_FFV1_DECODER 1 +#define CONFIG_SNOW_DECODER 1 +#define CONFIG_CYUV_DECODER 1 +#define CONFIG_H264_DECODER 1 +#define CONFIG_VP3_DECODER 1 +#define CONFIG_THEORA_DECODER 1 +#define CONFIG_ASV1_DECODER 1 +#define CONFIG_ASV2_DECODER 1 +#define CONFIG_VCR1_DECODER 1 +#define CONFIG_CLJR_DECODER 1 +#define CONFIG_FOURXM_DECODER 1 +#define CONFIG_MDEC_DECODER 1 +#define CONFIG_ROQ_DECODER 1 +#define CONFIG_INTERPLAY_VIDEO_DECODER 1 +#define CONFIG_XAN_WC3_DECODER 1 +#define CONFIG_RPZA_DECODER 1 +#define CONFIG_CINEPAK_DECODER 1 +#define CONFIG_MSRLE_DECODER 1 +#define CONFIG_MSVIDEO1_DECODER 1 +#define CONFIG_VQA_DECODER 1 +#define CONFIG_IDCIN_DECODER 1 +#define CONFIG_EIGHTBPS_DECODER 1 +#define CONFIG_SMC_DECODER 1 +#define CONFIG_FLIC_DECODER 1 +#define CONFIG_TRUEMOTION1_DECODER 1 +#define CONFIG_TRUEMOTION2_DECODER 1 +#define CONFIG_VMDVIDEO_DECODER 1 +#define CONFIG_VMDAUDIO_DECODER 1 +#define CONFIG_MSZH_DECODER 1 +#define CONFIG_ZLIB_DECODER 1 +#define CONFIG_ZMBV_DECODER 1 +#define CONFIG_SMACKER_DECODER 1 +#define CONFIG_SMACKAUD_DECODER 1 +#define CONFIG_SONIC_DECODER 1 +#define CONFIG_AC3_DECODER 1 +#define CONFIG_DTS_DECODER 1 +#define CONFIG_RA_144_DECODER 1 +#define CONFIG_RA_288_DECODER 1 +#define CONFIG_ROQ_DPCM_DECODER 1 +#define CONFIG_INTERPLAY_DPCM_DECODER 1 +#define CONFIG_XAN_DPCM_DECODER 1 +#define CONFIG_SOL_DPCM_DECODER 1 +#define CONFIG_QTRLE_DECODER 1 +#define CONFIG_FLAC_DECODER 1 +#define CONFIG_SHORTEN_DECODER 1 +#define CONFIG_ALAC_DECODER 1 +#define CONFIG_WS_SND1_DECODER 1 +#define CONFIG_VORBIS_DECODER 1 +#define CONFIG_LIBGSM_DECODER 1 +#define CONFIG_QDM2_DECODER 1 +#define CONFIG_COOK_DECODER 1 +#define CONFIG_TRUESPEECH_DECODER 1 +#define CONFIG_TTA_DECODER 1 +#define CONFIG_AVS_DECODER 1 +#define CONFIG_RAWVIDEO_DECODER 1 +#define CONFIG_AMR_NB_DECODER 1 +#define CONFIG_AMR_NB_ENCODER 1 +#define CONFIG_AMR_WB_DECODER 1 +#define CONFIG_AMR_WB_ENCODER 1 +#define CONFIG_BMP_DECODER 1 +#define CONFIG_MMVIDEO_DECODER 1 +#define CONFIG_DVDSUB_DECODER 1 +#define CONFIG_DVDSUB_ENCODER 1 +#define CONFIG_DVBSUB_DECODER 1 +#define CONFIG_DVBSUB_ENCODER 1 diff --git a/mpeg4/src/config.mak b/mpeg4/src/config.mak new file mode 100644 index 0000000000000000000000000000000000000000..c4d672f279eed3da716325f69ae19ea28f755bcf --- /dev/null +++ b/mpeg4/src/config.mak @@ -0,0 +1,213 @@ +# Automatically generated by configure - do not modify! +prefix=$(DESTDIR)/usr/local +libdir=$(DESTDIR)/usr/local/lib +incdir=$(DESTDIR)/usr/local/include/ffmpeg +bindir=$(DESTDIR)/usr/local/bin +mandir=$(DESTDIR)/usr/local/man +MAKE=make +CC=gcc +AR=ar +RANLIB=ranlib +STRIP=echo ignoring strip +INSTALLSTRIP= +OPTFLAGS=-O3 -g -Wall -Wno-switch -Wdeclaration-after-statement +SHCFLAGS=-O3 -g -Wall -Wno-switch -Wdeclaration-after-statement +LDFLAGS=-Wl,--warn-common -rdynamic +LDCONFIG=ldconfig +FFSLDFLAGS=-Wl,-E +SHFLAGS=-shared -Wl,-soname,$@.$(LIBMAJOR) +LIBOBJFLAGS= +BUILD_STATIC=yes +BUILDSUF= +LIBPREF=lib +LIBSUF=${BUILDSUF}.a +LIB=$(LIBPREF)$(NAME)$(LIBSUF) +SLIBPREF=lib +SLIBSUF=${BUILDSUF}.so +EXESUF=${BUILDSUF} +TARGET_OS=Linux +TARGET_ARCH_X86_64=yes +TARGET_BUILTIN_VECTOR=yes +HAVE_FREETYPE2=yes +CONFIG_SDL=yes +SDL_LIBS=-L/usr/lib -lSDL +SDL_CFLAGS=-I/usr/include/SDL -D_GNU_SOURCE=1 -D_REENTRANT +BUILD_VHOOK=yes +EXTRALIBS=-lm -lz -ldl +VERSION=CVS +CONFIG_ENCODERS=yes +CONFIG_DECODERS=yes +CONFIG_MUXERS=yes +CONFIG_DEMUXERS=yes +CONFIG_VIDEO4LINUX=yes +CONFIG_VIDEO4LINUX2=yes +CONFIG_DV1394=yes +CONFIG_AUDIO_OSS=yes +CONFIG_NETWORK=yes +CONFIG_ZLIB=yes +CONFIG_PROTOCOLS=yes +SRC_PATH=/usr/local/google/home/projects/bench/outofbox/mpeg/ffmpeg/ffmpeg +BUILD_ROOT=/usr/local/google/home/projects/bench/outofbox/mpeg/ffmpeg/obj +CONFIG_AC3_ENCODER=yes +CONFIG_MP2_ENCODER=yes +CONFIG_MP3LAME_ENCODER=yes +CONFIG_OGGVORBIS_ENCODER=yes +CONFIG_OGGVORBIS_DECODER=yes +CONFIG_OGGTHEORA_ENCODER=yes +CONFIG_OGGTHEORA_DECODER=yes +CONFIG_FAAC_ENCODER=yes +CONFIG_XVID_ENCODER=yes +CONFIG_MPEG1VIDEO_ENCODER=yes +CONFIG_H264_ENCODER=yes +CONFIG_MPEG2VIDEO_ENCODER=yes +CONFIG_H261_ENCODER=yes +CONFIG_H263_ENCODER=yes +CONFIG_H263P_ENCODER=yes +CONFIG_FLV_ENCODER=yes +CONFIG_RV10_ENCODER=yes +CONFIG_RV20_ENCODER=yes +CONFIG_MPEG4_ENCODER=yes +CONFIG_MSMPEG4V1_ENCODER=yes +CONFIG_MSMPEG4V2_ENCODER=yes +CONFIG_MSMPEG4V3_ENCODER=yes +CONFIG_WMV1_ENCODER=yes +CONFIG_WMV2_ENCODER=yes +CONFIG_SVQ1_ENCODER=yes +CONFIG_MJPEG_ENCODER=yes +CONFIG_LJPEG_ENCODER=yes +CONFIG_JPEGLS_ENCODER=yes +CONFIG_PNG_ENCODER=yes +CONFIG_PPM_ENCODER=yes +CONFIG_PGM_ENCODER=yes +CONFIG_PGMYUV_ENCODER=yes +CONFIG_PBM_ENCODER=yes +CONFIG_PAM_ENCODER=yes +CONFIG_HUFFYUV_ENCODER=yes +CONFIG_FFVHUFF_ENCODER=yes +CONFIG_ASV1_ENCODER=yes +CONFIG_ASV2_ENCODER=yes +CONFIG_FFV1_ENCODER=yes +CONFIG_SNOW_ENCODER=yes +CONFIG_ZLIB_ENCODER=yes +CONFIG_DVVIDEO_ENCODER=yes +CONFIG_SONIC_ENCODER=yes +CONFIG_SONIC_LS_ENCODER=yes +CONFIG_X264_ENCODER=yes +CONFIG_LIBGSM_ENCODER=yes +CONFIG_RAWVIDEO_ENCODER=yes +CONFIG_H263_DECODER=yes +CONFIG_H261_DECODER=yes +CONFIG_MPEG4_DECODER=yes +CONFIG_MSMPEG4V1_DECODER=yes +CONFIG_MSMPEG4V2_DECODER=yes +CONFIG_MSMPEG4V3_DECODER=yes +CONFIG_WMV1_DECODER=yes +CONFIG_WMV2_DECODER=yes +CONFIG_VC9_DECODER=yes +CONFIG_WMV3_DECODER=yes +CONFIG_H263I_DECODER=yes +CONFIG_FLV_DECODER=yes +CONFIG_RV10_DECODER=yes +CONFIG_RV20_DECODER=yes +CONFIG_SVQ1_DECODER=yes +CONFIG_SVQ3_DECODER=yes +CONFIG_WMAV1_DECODER=yes +CONFIG_WMAV2_DECODER=yes +CONFIG_INDEO2_DECODER=yes +CONFIG_INDEO3_DECODER=yes +CONFIG_TSCC_DECODER=yes +CONFIG_CSCD_DECODER=yes +CONFIG_NUV_DECODER=yes +CONFIG_ULTI_DECODER=yes +CONFIG_QDRAW_DECODER=yes +CONFIG_XL_DECODER=yes +CONFIG_QPEG_DECODER=yes +CONFIG_LOCO_DECODER=yes +CONFIG_KMVC_DECODER=yes +CONFIG_WNV1_DECODER=yes +CONFIG_AASC_DECODER=yes +CONFIG_FRAPS_DECODER=yes +CONFIG_AAC_DECODER=yes +CONFIG_MPEG4AAC_DECODER=yes +CONFIG_MPEG1VIDEO_DECODER=yes +CONFIG_MPEG2VIDEO_DECODER=yes +CONFIG_MPEGVIDEO_DECODER=yes +CONFIG_MPEG_XVMC_DECODER=yes +CONFIG_DVVIDEO_DECODER=yes +CONFIG_MJPEG_DECODER=yes +CONFIG_MJPEGB_DECODER=yes +CONFIG_SP5X_DECODER=yes +CONFIG_PNG_DECODER=yes +CONFIG_MP2_DECODER=yes +CONFIG_MP3_DECODER=yes +CONFIG_MP3ADU_DECODER=yes +CONFIG_MP3ON4_DECODER=yes +CONFIG_MACE3_DECODER=yes +CONFIG_MACE6_DECODER=yes +CONFIG_HUFFYUV_DECODER=yes +CONFIG_FFVHUFF_DECODER=yes +CONFIG_FFV1_DECODER=yes +CONFIG_SNOW_DECODER=yes +CONFIG_CYUV_DECODER=yes +CONFIG_H264_DECODER=yes +CONFIG_VP3_DECODER=yes +CONFIG_THEORA_DECODER=yes +CONFIG_ASV1_DECODER=yes +CONFIG_ASV2_DECODER=yes +CONFIG_VCR1_DECODER=yes +CONFIG_CLJR_DECODER=yes +CONFIG_FOURXM_DECODER=yes +CONFIG_MDEC_DECODER=yes +CONFIG_ROQ_DECODER=yes +CONFIG_INTERPLAY_VIDEO_DECODER=yes +CONFIG_XAN_WC3_DECODER=yes +CONFIG_RPZA_DECODER=yes +CONFIG_CINEPAK_DECODER=yes +CONFIG_MSRLE_DECODER=yes +CONFIG_MSVIDEO1_DECODER=yes +CONFIG_VQA_DECODER=yes +CONFIG_IDCIN_DECODER=yes +CONFIG_EIGHTBPS_DECODER=yes +CONFIG_SMC_DECODER=yes +CONFIG_FLIC_DECODER=yes +CONFIG_TRUEMOTION1_DECODER=yes +CONFIG_TRUEMOTION2_DECODER=yes +CONFIG_VMDVIDEO_DECODER=yes +CONFIG_VMDAUDIO_DECODER=yes +CONFIG_MSZH_DECODER=yes +CONFIG_ZLIB_DECODER=yes +CONFIG_ZMBV_DECODER=yes +CONFIG_SMACKER_DECODER=yes +CONFIG_SMACKAUD_DECODER=yes +CONFIG_SONIC_DECODER=yes +CONFIG_AC3_DECODER=yes +CONFIG_DTS_DECODER=yes +CONFIG_RA_144_DECODER=yes +CONFIG_RA_288_DECODER=yes +CONFIG_ROQ_DPCM_DECODER=yes +CONFIG_INTERPLAY_DPCM_DECODER=yes +CONFIG_XAN_DPCM_DECODER=yes +CONFIG_SOL_DPCM_DECODER=yes +CONFIG_QTRLE_DECODER=yes +CONFIG_FLAC_DECODER=yes +CONFIG_SHORTEN_DECODER=yes +CONFIG_ALAC_DECODER=yes +CONFIG_WS_SND1_DECODER=yes +CONFIG_VORBIS_DECODER=yes +CONFIG_LIBGSM_DECODER=yes +CONFIG_QDM2_DECODER=yes +CONFIG_COOK_DECODER=yes +CONFIG_TRUESPEECH_DECODER=yes +CONFIG_TTA_DECODER=yes +CONFIG_AVS_DECODER=yes +CONFIG_RAWVIDEO_DECODER=yes +CONFIG_AMR_NB_DECODER=yes +CONFIG_AMR_NB_ENCODER=yes +CONFIG_AMR_WB_DECODER=yes +CONFIG_AMR_WB_ENCODER=yes +CONFIG_BMP_DECODER=yes +CONFIG_MMVIDEO_DECODER=yes +CONFIG_DVDSUB_DECODER=yes +CONFIG_DVDSUB_ENCODER=yes +CONFIG_DVBSUB_DECODER=yes +CONFIG_DVBSUB_ENCODER=yes diff --git a/mpeg4/src/configure b/mpeg4/src/configure new file mode 100755 index 0000000000000000000000000000000000000000..20b6e4bbd990fbdec24673ebc54652d55db5227f --- /dev/null +++ b/mpeg4/src/configure @@ -0,0 +1,2097 @@ +#!/bin/sh +# +# ffmpeg configure script (c) 2000, 2001, 2002 Fabrice Bellard +# + +if test x"$1" = x"-h" -o x"$1" = x"--help" ; then +cat << EOF + +Usage: configure [options] +Options: [defaults in brackets after descriptions] + +EOF +echo "Standard options:" +echo " --help print this message" +echo " --prefix=PREFIX install in PREFIX [$prefix]" +echo " --libdir=DIR install libs in DIR [PREFIX/lib]" +echo " --incdir=DIR install includes in DIR [PREFIX/include/ffmpeg]" +echo " --mandir=DIR install man page in DIR [PREFIX/man]" +echo " --enable-mp3lame enable MP3 encoding via libmp3lame [default=no]" +echo " --enable-libogg enable Ogg support via libogg [default=no]" +echo " --enable-vorbis enable Vorbis support via libvorbis [default=no]" +echo " --enable-theora enable Theora support via libtheora [default=no]" +echo " --enable-faad enable FAAD support via libfaad [default=no]" +echo " --enable-faadbin build FAAD support with runtime linking [default=no]" +echo " --enable-faac enable FAAC support via libfaac [default=no]" +echo " --enable-libgsm enable GSM support via libgsm [default=no]" +echo " --enable-xvid enable XviD support via xvidcore [default=no]" +echo " --enable-x264 enable H.264 encoding via x264 [default=no]" +echo " --enable-mingw32 enable MinGW native/cross Windows compile" +echo " --enable-mingwce enable MinGW native/cross WinCE compile" +echo " --enable-a52 enable GPLed A52 support [default=no]" +echo " --enable-a52bin open liba52.so.0 at runtime [default=no]" +echo " --enable-dts enable GPLed DTS support [default=no]" +echo " --enable-pp enable GPLed postprocessing support [default=no]" +echo " --enable-static build static libraries [default=yes]" +echo " --disable-static do not build static libraries [default=no]" +echo " --enable-shared build shared libraries [default=no]" +echo " --disable-shared do not build shared libraries [default=yes]" +echo " --enable-amr_nb enable amr_nb float audio codec" +echo " --enable-amr_nb-fixed use fixed point for amr-nb codec" +echo " --enable-amr_wb enable amr_wb float audio codec" +echo " --enable-amr_if2 enable amr_wb IF2 audio codec" +echo " --enable-sunmlib use Sun medialib [default=no]" +echo " --enable-pthreads use pthreads [default=no]" +echo " --enable-dc1394 enable IIDC-1394 grabbing using libdc1394" +echo " and libraw1394 [default=no]" +echo " --enable-gpl allow use of GPL code, the resulting libav*" +echo " and ffmpeg will be under GPL [default=no]" +echo "" +echo "Advanced options (experts only):" +echo " --source-path=PATH path to source code [$source_path]" +echo " --cross-prefix=PREFIX use PREFIX for compilation tools [$cross_prefix]" +echo " --cc=CC use C compiler CC [$cc]" +echo " --make=MAKE use specified make [$make]" +echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]" +echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]" +echo " --extra-libs=ELIBS add ELIBS [$ELIBS]" +echo " --build-suffix=SUFFIX suffix for application specific build []" +echo " --cpu=CPU force cpu to CPU [$cpu]" +echo " --tune=CPU tune code for a particular CPU" +echo " (may fail or perform badly on other CPUs)" +echo " --powerpc-perf-enable enable performance report on PPC" +echo " (requires enabling PMC)" +echo " --disable-mmx disable MMX usage" +echo " --disable-iwmmxt disable iwmmxt usage" +echo " --disable-altivec disable AltiVec usage" +echo " --disable-audio-oss disable OSS audio support [default=no]" +echo " --disable-audio-beos disable BeOS audio support [default=no]" +echo " --disable-v4l disable video4linux grabbing [default=no]" +echo " --disable-v4l2 disable video4linux2 grabbing [default=no]" +echo " --disable-bktr disable bktr video grabbing [default=no]" +echo " --disable-dv1394 disable DV1394 grabbing [default=no]" +echo " --disable-network disable network support [default=no]" +echo " --disable-zlib disable zlib [default=no]" +echo " --disable-lzo disable lzo [default=no]" +echo " --disable-simple_idct disable simple IDCT routines [default=no]" +echo " --disable-vhook disable video hooking support" +echo " --enable-gprof enable profiling with gprof [$gprof]" +echo " --disable-debug disable debugging symbols" +echo " --disable-opts disable compiler optimizations" +echo " --disable-mpegaudio-hp faster (but less accurate)" +echo " MPEG audio decoding [default=no]" +echo " --disable-protocols disable I/O protocols support [default=no]" +echo " --disable-ffserver disable ffserver build" +echo " --disable-ffplay disable ffplay build" +echo " --enable-small optimize for size instead of speed" +echo " --enable-memalign-hack emulate memalign, interferes with memory debuggers" +echo " --disable-strip disable stripping of executables and shared libraries" +echo " --disable-encoder=NAME disables encoder NAME" +echo " --enable-encoder=NAME enables encoder NAME" +echo " --disable-decoder=NAME disables decoder NAME" +echo " --enable-decoder=NAME enables decoder NAME" +echo " --disable-encoders disables all encoders" +echo " --disable-decoders disables all decoders" +echo " --disable-muxers disables all muxers" +echo " --disable-demuxers disables all demuxers" +echo "" +echo "NOTE: Object files are built at the place where configure is launched." +exit 1 +fi + +# set temporary file name +if test ! -z "$TMPDIR" ; then + TMPDIR1="${TMPDIR}" +elif test ! -z "$TEMPDIR" ; then + TMPDIR1="${TEMPDIR}" +else + TMPDIR1="/tmp" +fi + +TMPC="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.c" +TMPO="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.o" +TMPE="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}" +TMPS="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.S" +TMPH="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.h" + +# default parameters +prefix="/usr/local" +libdir="" +incdir="" +mandir="" +bindir="" +cross_prefix="" +cc="gcc" +ar="ar" +ranlib="ranlib" +make="make" +strip="strip" +cpu=`uname -m` +tune="generic" +powerpc_perf="no" +mmx="default" +iwmmxt="default" +altivec="default" +mmi="default" +case "$cpu" in + i386|i486|i586|i686|i86pc|BePC) + cpu="x86" + ;; + x86_64|amd64) + cpu="x86" + canon_arch="`cc -dumpmachine | sed -e 's,\([^-]*\)-.*,\1,'`" + if [ x"$canon_arch" = x"x86_64" -o x"$canon_arch" = x"amd64" ]; then + if [ -z "`echo $CFLAGS | grep -- -m32`" ]; then + cpu="x86_64" + fi + fi + ;; + # armv4l is a subset of armv5tel + armv4l|armv5tel) + cpu="armv4l" + ;; + alpha) + cpu="alpha" + ;; + "Power Macintosh"|ppc|powerpc) + cpu="powerpc" + ;; + mips|mipsel) + cpu="mips" + ;; + sun4u|sparc64) + cpu="sparc64" + ;; + sparc) + cpu="sparc" + ;; + sh4) + cpu="sh4" + ;; + parisc|parisc64) + cpu="parisc" + ;; + s390|s390x) + cpu="s390" + ;; + m68k) + cpu="m68k" + ;; + ia64) + cpu="ia64" + ;; + *) + cpu="unknown" + ;; +esac +gprof="no" +v4l="yes" +v4l2="yes" +bktr="no" +audio_oss="yes" +audio_beos="no" +dv1394="yes" +dc1394="no" +network="yes" +zlib="yes" +lzo="yes" +libgsm="no" +mp3lame="no" +libogg="no" +vorbis="no" +theora="no" +faad="no" +faadbin="no" +faac="no" +xvid="no" +x264="no" +a52="no" +a52bin="no" +dts="no" +pp="no" +mingw32="no" +mingwce="no" +cygwin="no" +os2="no" +lstatic="yes" +lshared="no" +optimize="yes" +debug="yes" +dostrip="yes" +installstrip="-s" +extralibs="-lm" +simpleidct="yes" +bigendian="no" +inttypes="yes" +emu_fast_int="no" +vhook="default" +dlfcn="no" +dlopen="no" +mpegaudio_hp="yes" +SHFLAGS='-shared -Wl,-soname,$@.$(LIBMAJOR)' +netserver="no" +need_inet_aton="no" +protocols="yes" +ffserver="yes" +ffplay="yes" +LIBOBJFLAGS="" +LDFLAGS=-Wl,--warn-common +FFSLDFLAGS=-Wl,-E +LDCONFIG="ldconfig" +LIBPREF="lib" +LIBSUF=".a" +LIB='$(LIBPREF)$(NAME)$(LIBSUF)' +SLIBPREF="lib" +SLIBSUF=".so" +SLIBNAME='$(SLIBPREF)$(NAME)$(SLIBSUF)' +SLIBNAME_WITH_VERSION='$(SLIBPREF)$(NAME)$(SLIBSUF).$(LIBVERSION)' +SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(NAME)$(SLIBSUF).$(LIBMAJOR)' +EXESUF="" +BUILDSUF="" +amr_nb="no" +amr_wb="no" +amr_nb_fixed="no" +amr_if2="no" +sunmlib="no" +pthreads="no" +gpl="no" +memalignhack="no" +muxers="yes" +demuxers="yes" + +# OS specific +targetos=`uname -s` +case $targetos in +BeOS) +prefix="/boot/home/config" +# helps building libavcodec +CFLAGS="$CFLAGS -DPIC -fomit-frame-pointer" +# 3 gcc releases known for BeOS, each with ugly bugs +gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`" +case "$gcc_version" in +2.9-beos-991026*|2.9-beos-000224*) echo "R5/GG gcc" +mmx="no" +;; +*20010315*) echo "BeBits gcc" +CFLAGS="$CFLAGS -fno-expensive-optimizations" +;; +esac +SHFLAGS=-nostart +# disable Linux things +audio_oss="no" +v4l="no" +v4l2="no" +dv1394="no" +# enable BeOS things +audio_beos="yes" +# no need for libm, but the inet stuff +# Check for BONE +if (echo $BEINCLUDES|grep 'headers/be/bone' >/dev/null); then +extralibs="-lbind -lsocket" +else +netserver="yes" +need_inet_aton="yes" +extralibs="-lnet" +fi ;; +SunOS) +v4l="no" +v4l2="no" +audio_oss="no" +dv1394="no" +make="gmake" +LDFLAGS="" +FFSLDFLAGS="" +need_inet_aton="yes" +extralibs="$extralibs -lsocket -lnsl" +;; +NetBSD) +v4l="no" +v4l2="no" +bktr="yes" +audio_oss="yes" +dv1394="no" +make="gmake" +LDFLAGS="$LDFLAGS -export-dynamic" +case `uname -r` in +2.*) extralibs="$extralibs -lossaudio" +;; +esac +;; +OpenBSD) +v4l="no" +v4l2="no" +bktr="yes" +audio_oss="yes" +dv1394="no" +make="gmake" +LIBOBJFLAGS="\$(PIC)" +LDFLAGS="$LDFLAGS -export-dynamic -pthread" +LDCONFIG="ldconfig -m \$(libdir)" +extralibs="$extralibs -lossaudio" +;; +FreeBSD) +v4l="no" +v4l2="no" +bktr="yes" +audio_oss="yes" +dv1394="no" +make="gmake" +CFLAGS="$CFLAGS -pthread" +LDFLAGS="$LDFLAGS -export-dynamic -pthread" +;; +BSD/OS) +v4l="no" +v4l2="no" +bktr="yes" +audio_oss="yes" +dv1394="no" +extralibs="-lpoll -lgnugetopt -lm" +make="gmake" +strip="strip -d" +installstrip="" +;; +Darwin) +cc="cc" +v4l="no" +v4l2="no" +audio_oss="no" +dv1394="no" +SHFLAGS="-dynamiclib -Wl,-single_module -Wl,-install_name,\$(libdir)/\$(SLIBNAME),-current_version,\$(SPPVERSION),-compatibility_version,\$(SPPVERSION)" +extralibs="" +darwin="yes" +strip="strip -x" +installstrip="" +LDFLAGS="-Wl,-dynamic,-search_paths_first" +SLIBSUF=".dylib" +SLIBNAME_WITH_FULLVERSION='$(SLIBPREF)$(NAME).$(LIBVERSION)$(SLIBSUF)' +SLIBNAME_WITH_MAJOR='$(SLIBPREF)$(NAME).$(LIBMAJOR)$(SLIBSUF)' +FFSLDFLAGS=-Wl,-bind_at_load +;; +MINGW32*) +# Note: the rest of the mingw32 config is done afterwards as mingw32 +# can be forced on the command line for Linux cross compilation. +mingw32="yes" +;; +CYGWIN*) +v4l="no" +v4l2="no" +audio_oss="yes" +dv1394="no" +vhook="no" +extralibs="" +cygwin="yes" +EXESUF=".exe" +;; +Linux) +LDFLAGS="$LDFLAGS -rdynamic" +;; +IRIX*) +ranlib="echo ignoring ranlib" +v4l="no" +v4l2="no" +audio_oss="no" +make="gmake" +;; +OS/2) +TMPE=$TMPE".exe" +ar="emxomfar -p128" +ranlib="echo ignoring ranlib" +strip="echo ignoring strip" +CFLAGS="$CFLAGS -Zomf" +LDFLAGS="-Zomf -Zstack 16384 -s" +SHFLAGS="-Zdll -Zomf" +FFSLDFLAGS="" +LIBPREF="" +LIBSUF=".lib" +SLIBPREF="" +SLIBSUF=".dll" +EXESUF=".exe" +extralibs="" +pkg_requires="" +v4l="no" +v4l2="no" +audio_oss="no" +dv1394="no" +ffserver="no" +vhook="no" +os2="yes" + +;; +*) ;; +esac + +# From MPlayer configure. We need TARGET_OS available +# to the Makefile, so it can distinguish between flavors +# of AltiVec on PowerPC. +TARGET_OS=`( uname -s ) 2>&1` + case "$TARGET_OS" in + Linux|FreeBSD|NetBSD|BSD/OS|OpenBSD|SunOS|QNX|Darwin|GNU|BeOS|MorphOS) + ;; + IRIX*) + TARGET_OS=IRIX + ;; + HP-UX*) + TARGET_OS=HP-UX + ;; + [cC][yY][gG][wW][iI][nN]*) + TARGET_OS=CYGWIN + ;; + *) + TARGET_OS="$TARGET_OS-UNKNOWN" + ;; + esac + +# find source path +source_path="`dirname $0`" +source_path_used="yes" +if test -z "$source_path" -o "$source_path" = "." ; then + source_path=`pwd` + source_path_used="no" +else + source_path="`cd \"$source_path\"; pwd`" +fi + +FFMPEG_CONFIGURATION=" " +for opt do + FFMPEG_CONFIGURATION="$FFMPEG_CONFIGURATION""$opt " +done + +CODEC_LIST=`grep 'register_avcodec(&[a-z]' $source_path/libavcodec/allcodecs.c | sed 's/.*&\(.*\)).*/\1/'` + +for opt do + case "$opt" in + --prefix=*) prefix=`echo $opt | cut -d '=' -f 2`; force_prefix=yes + ;; + --libdir=*) libdir=`echo $opt | cut -d '=' -f 2`; force_libdir=yes + ;; + --incdir=*) incdir=`echo $opt | cut -d '=' -f 2`; + ;; + --mandir=*) mandir=`echo $opt | cut -d '=' -f 2` + ;; + --source-path=*) source_path=`echo $opt | cut -d '=' -f 2` + ;; + --cross-prefix=*) cross_prefix=`echo $opt | cut -d '=' -f 2` + ;; + --cc=*) cc=`echo $opt | cut -d '=' -f 2-` + ;; + --make=*) make=`echo $opt | cut -d '=' -f 2` + ;; + --extra-cflags=*) CFLAGS="$CFLAGS ${opt#--extra-cflags=}" + ;; + --extra-ldflags=*) LDFLAGS="$LDFLAGS ${opt#--extra-ldflags=}" + ;; + --extra-libs=*) extralibs=${opt#--extra-libs=} + ;; + --build-suffix=*) BUILDSUF=${opt#--build-suffix=} + ;; + --cpu=*) cpu=`echo $opt | cut -d '=' -f 2` + ;; + --tune=*) tune=`echo $opt | cut -d '=' -f 2` + ;; + --powerpc-perf-enable) powerpc_perf="yes" + ;; + --disable-mmx) mmx="no" + ;; + --disable-iwmmxt) iwmmxt="no" + ;; + --disable-altivec) altivec="no" + ;; + --enable-gprof) gprof="yes" + ;; + --disable-v4l) v4l="no" + ;; + --disable-v4l2) v4l2="no" + ;; + --disable-bktr) bktr="no" + ;; + --disable-audio-oss) audio_oss="no" + ;; + --disable-audio-beos) audio_beos="no" + ;; + --disable-dv1394) dv1394="no" + ;; + --disable-network) network="no"; ffserver="no" + ;; + --disable-zlib) zlib="no" + ;; + --disable-lzo) lzo="no" + ;; + --enable-a52) a52="yes" + ;; + --enable-a52bin) a52bin="yes" + ;; + --enable-dts) dts="yes" + extralibs="$extralibs -ldts" + ;; + --enable-pp) pp="yes" + ;; + --enable-libgsm) libgsm="yes" + extralibs="$extralibs -lgsm" + ;; + --enable-mp3lame) mp3lame="yes" + extralibs="$extralibs -lmp3lame" + ;; + --enable-libogg) libogg="yes" + extralibs="$extralibs -logg" + pkg_requires="$pkg_requires ogg >= 1.1" + ;; + --enable-vorbis) vorbis="yes" + extralibs="$extralibs -lvorbis -lvorbisenc" + pkg_requires="$pkg_requires vorbis vorbisenc" + ;; + --enable-theora) theora="yes" + extralibs="$extralibs -ltheora" + pkg_requires="$pkg_requires theora" + ;; + --enable-faad) faad="yes" + extralibs="$extralibs -lfaad" + ;; + --enable-faadbin) faadbin="yes" + ;; + --enable-faac) faac="yes" + extralibs="$extralibs -lfaac" + ;; + --enable-xvid) xvid="yes" + extralibs="$extralibs -lxvidcore" + ;; + --enable-x264) x264="yes" + extralibs="$extralibs -lx264" + ;; + --enable-dc1394) dc1394="yes" + extralibs="$extralibs -ldc1394_control -lraw1394" + pkg_requires="$pkg_requires libraw1394" + ;; + --disable-vhook) vhook="no" + ;; + --disable-simple_idct) simpleidct="no" + ;; + --enable-mingw32) mingw32="yes" + ;; + --enable-mingwce) mingwce="yes" + ;; + --enable-static) lstatic="yes" + ;; + --disable-static) lstatic="no" + ;; + --enable-shared) lshared="yes" + ;; + --disable-shared) lshared="no" + ;; + --disable-debug) debug="no" + ;; + --disable-opts) optimize="no" + ;; + --disable-mpegaudio-hp) mpegaudio_hp="no" + ;; + --disable-protocols) protocols="no"; network="no"; ffserver="no" + ;; + --disable-ffserver) ffserver="no" + ;; + --disable-ffplay) ffplay="no" + ;; + --enable-small) optimize="small" + ;; + --enable-amr_nb) amr_nb="yes" + ;; + --enable-amr_nb-fixed) amr_nb_fixed="yes" + ;; + --enable-amr_wb) amr_wb="yes" + ;; + --enable-amr_if2) amr_if2="yes" + ;; + --enable-sunmlib) sunmlib="yes" + ;; + --enable-pthreads) pthreads="yes" + ;; + --enable-gpl) gpl="yes" + ;; + --enable-memalign-hack) memalignhack="yes" + ;; + --disable-strip) dostrip="no" + ;; + --enable-encoder=*) CODEC_LIST="$CODEC_LIST ${opt#--enable-encoder=}_encoder" + ;; + --enable-decoder=*) CODEC_LIST="$CODEC_LIST ${opt#--enable-decoder=}_decoder" + ;; + --disable-encoder=*) CODEC_LIST="`echo $CODEC_LIST | sed -e \"s#${opt#--disable-encoder=}_encoder##\"`" + ;; + --disable-decoder=*) CODEC_LIST="`echo $CODEC_LIST | sed -e \"s#${opt#--disable-decoder=}_decoder##\"`" + ;; + --disable-encoders) CODEC_LIST="`echo $CODEC_LIST | sed 's/[-_a-zA-Z0-9]*encoder//g'`" + ;; + --disable-decoders) CODEC_LIST="`echo $CODEC_LIST | sed 's/[-_a-zA-Z0-9]*decoder//g'`" + ;; + --disable-muxers) muxers="no"; ffserver="no" + ;; + --disable-demuxers) demuxers="no" + ;; + *) + echo "Unknown option \"$opt\"." + echo "See $0 --help for available options." + exit 1 + ;; + esac +done + +# we need to build at least one lib type +if test "$lstatic" = "no" && test "$lshared" = "no" ; then + cat < $TMPC << EOF + #include + int main( void ) { return 0; } +EOF + + if $cc $CFLAGS -o $TMPE $TMPC 2> /dev/null ; then + cat > $TMPC << EOF + #include + #ifndef FAAD2_VERSION + ok faad1 + #endif + int main( void ) { return 0; } +EOF + if $cc $CFLAGS -o $TMPE $TMPC 2> /dev/null ; then + echo "FAAD2 is under GPL and --enable-gpl is not specified." + fail="yes" + fi + else + faad="no" + faadbin="no" + echo "FAAD test failed." + fi + fi + + + if test "$fail" = "yes"; then + exit 1 + fi +fi + +# compute MMX state +if test $mmx = "default"; then + if test $cpu = "x86" -o $cpu = "x86_64"; then + mmx="yes" + else + mmx="no" + fi +fi + +# check iwmmxt support +if test $iwmmxt = "default" -a $cpu = "armv4l"; then + cat > $TMPC << EOF + int main(void) { + __asm__ __volatile__ ("wunpckelub wr6, wr4"); + } +EOF + + iwmmxt=no + if ${cross_prefix}${cc} -o $TMPO $TMPC 2> /dev/null ; then + iwmmxt=yes + fi +fi + +#Darwin CC versions +needmdynamicnopic="no" +if test $targetos = Darwin; then + if test -n "`$cc -v 2>&1 | grep xlc`"; then + CFLAGS="$CFLAGS -qpdf2 -qlanglvl=extc99 -qmaxmem=-1 -qarch=auto -qtune=auto" + else + gcc_version="`$cc -v 2>&1 | grep version | cut -d ' ' -f3-`" + case "$gcc_version" in + *2.95*) + CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" + ;; + *[34].*) + CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer -force_cpusubtype_ALL -Wno-sign-compare" + if test "$lshared" = no; then + needmdynamicnopic="yes" + fi + ;; + *) + CFLAGS="$CFLAGS -no-cpp-precomp -pipe -fomit-frame-pointer" + if test "$lshared" = no; then + needmdynamicnopic="yes" + fi + ;; + esac + fi +fi + +# Can only do AltiVec on PowerPC +if test $altivec = "default"; then + if test $cpu = "powerpc"; then + altivec="yes" + else + altivec="no" + fi +fi + +# Add processor-specific flags +TUNECPU="generic" +POWERPCMODE="32bits" +if test $tune != "generic"; then + case $tune in + 601|ppc601|PowerPC601) + CFLAGS="$CFLAGS -mcpu=601" + if test $altivec = "yes"; then + echo "WARNING: Tuning for PPC601 but AltiVec enabled!"; + fi + TUNECPU=ppc601 + ;; + 603*|ppc603*|PowerPC603*) + CFLAGS="$CFLAGS -mcpu=603" + if test $altivec = "yes"; then + echo "WARNING: Tuning for PPC603 but AltiVec enabled!"; + fi + TUNECPU=ppc603 + ;; + 604*|ppc604*|PowerPC604*) + CFLAGS="$CFLAGS -mcpu=604" + if test $altivec = "yes"; then + echo "WARNING: Tuning for PPC604 but AltiVec enabled!"; + fi + TUNECPU=ppc604 + ;; + G3|g3|75*|ppc75*|PowerPC75*) + CFLAGS="$CFLAGS -mcpu=750 -mtune=750 -mpowerpc-gfxopt" + if test $altivec = "yes"; then + echo "WARNING: Tuning for PPC75x but AltiVec enabled!"; + fi + TUNECPU=ppc750 + ;; + G4|g4|745*|ppc745*|PowerPC745*) + CFLAGS="$CFLAGS -mcpu=7450 -mtune=7450 -mpowerpc-gfxopt" + if test $altivec = "no"; then + echo "WARNING: Tuning for PPC745x but AltiVec disabled!"; + fi + TUNECPU=ppc7450 + ;; + 74*|ppc74*|PowerPC74*) + CFLAGS="$CFLAGS -mcpu=7400 -mtune=7400 -mpowerpc-gfxopt" + if test $altivec = "no"; then + echo "WARNING: Tuning for PPC74xx but AltiVec disabled!"; + fi + TUNECPU=ppc7400 + ;; + G5|g5|970|ppc970|PowerPC970|power4*|Power4*) + CFLAGS="$CFLAGS -mcpu=970 -mtune=970 -mpowerpc-gfxopt -mpowerpc64" + if test $altivec = "no"; then + echo "WARNING: Tuning for PPC970 but AltiVec disabled!"; + fi + TUNECPU=ppc970 + POWERPCMODE="64bits" + ;; + i[3456]86|pentium|pentiumpro|pentium-mmx|pentium[234]|prescott|k6|k6-[23]|athlon|athlon-tbird|athlon-4|athlon-[mx]p|winchip-c6|winchip2|c3|nocona|athlon64|k8|opteron|athlon-fx) + CFLAGS="$CFLAGS -march=$tune" + ;; + *) + echo "WARNING: Unknown CPU \"$tune\", ignored." + ;; + esac +fi + +# AltiVec flags: The FSF version of GCC differs from the Apple version +if test $cpu = "powerpc"; then + if test $altivec = "yes"; then + if test -n "`$cc -v 2>&1 | grep version | grep Apple`"; then + CFLAGS="$CFLAGS -faltivec" + else + CFLAGS="$CFLAGS -maltivec -mabi=altivec" + fi + fi +fi + +# check if we have +cat > $TMPC << EOF +#include +int main( void ) { return 0; } +EOF + +_altivec_h="no" +if $cc $CFLAGS -o $TMPE $TMPC 2> /dev/null ; then +_altivec_h="yes" +fi + +# check if our compiler supports Motorola AltiVec C API +if test $altivec = "yes"; then +if test $_altivec_h = "yes"; then +cat > $TMPC << EOF +#include +int main(void) { + vector signed int v1, v2, v3; + v1 = vec_add(v2,v3); + return 0; +} +EOF +else +cat > $TMPC << EOF +int main(void) { + vector signed int v1, v2, v3; + v1 = vec_add(v2,v3); + return 0; +} +EOF +fi +$cc $CFLAGS -o $TMPE $TMPC 2> /dev/null || altivec="no" +fi + +# mmi only available on mips +if test $mmi = "default"; then + if test $cpu = "mips"; then + mmi="yes" + else + mmi="no" + fi +fi + +# check if our compiler supports mmi +if test $mmi = "yes"; then +cat > $TMPC << EOF +int main(void) { + __asm__ ("lq \$2, 0(\$2)"); + return 0; +} +EOF +$cc -o $TMPE $TMPC 2> /dev/null || mmi="no" +fi + +if test "$mingw32" = "yes" -o "$mingwce" = "yes"; then + if test "$lshared" = "yes" && test "$lstatic" = "yes" ; then + cat < $TMPC << EOF +#include +int main(int argc, char ** argv){ + volatile uint32_t i=0x01234567; + return (*((uint8_t*)(&i))) == 0x67; +} +EOF + +if $cc -o $TMPE $TMPC 2>/dev/null ; then +$TMPE && bigendian="yes" +else +echo big/little test failed +fi + +else + +# programs cannot be launched if cross compiling, so make a static guess +if test "$cpu" = "powerpc" -o "$cpu" = "mips" ; then + bigendian="yes" +fi + +fi + +# --- +# *inttypes.h* test +cat > $TMPC << EOF +#include +int main(int argc, char ** argv){ + return 0; +} +EOF + +$cc -o $TMPE $TMPC 2>/dev/null || inttypes="no" + +# --- +# *int_fast* test +cat > $TMPC << EOF +#include +int main(int argc, char ** argv){ + volatile uint_fast64_t i=0x01234567; + return 0; +} +EOF + +$cc -o $TMPE $TMPC 2>/dev/null || emu_fast_int="yes" + +# --- +# check availability of some header files + +cat > $TMPC << EOF +#include +int main( void ) { return 0; } +EOF + +_memalign=no +_malloc_h=no +if $cc -o $TMPE $TMPC 2> /dev/null ; then +_malloc_h=yes +_memalign=yes +# check for memalign - atmos +cat > $TMPC << EOF +#include +#include +int main ( void ) { +char *string = NULL; +string = memalign(64, sizeof(char)); +return 0; +} +EOF +$cc -o $TMPE $TMPC 2> /dev/null || _memalign=no +fi + +if test "$_memalign" = "no" -a "$mmx" = "yes" -a "$memalignhack" != "yes"; then + echo "Error, no memalign() but SSE enabled, disable it or use --enable-memalign-hack." + exit 1 +fi + +cat > $TMPC << EOF +#include +int main( void ) { localtime_r(NULL, NULL); } +EOF + +localtime_r=no +if $cc -o $TMPE $TMPC 2> /dev/null ; then + localtime_r=yes +fi + +if test "$zlib" = "yes"; then +# check for zlib - mmu_man +cat > $TMPC << EOF +#include +int main ( void ) { +if (zlibVersion() != ZLIB_VERSION) + puts("zlib version differs !!!"); + return 1; +return 0; +} +EOF +$cc $CFLAGS $LDFLAGS -o $TMPE $TMPC -lz 2> /dev/null || zlib="no" +# $TMPE 2> /dev/null > /dev/null || zlib="no" +# XXX: more tests needed - runtime test +fi +if test "$zlib" = "yes"; then +extralibs="$extralibs -lz" +fi + +if test "$lzo" = "yes" -a "$gpl" = "yes"; then +# check for liblzo +cat > $TMPC << EOF +#include +int main ( void ) { +lzo_init(); +return 0; +} +EOF +$cc $CFLAGS $LDFLAGS -o $TMPE $TMPC -llzo 2> /dev/null || lzo="no" +else +lzo="no" +fi +if test "$lzo" = "yes"; then +extralibs="$extralibs -llzo" +fi + +# test for lrintf in math.h +cat > $TMPC << EOF +#define _ISOC9X_SOURCE 1 +#include +int main( void ) { return (lrintf(3.999f) > 0)?0:1; } +EOF + +have_lrintf="no" +if $cc $CFLAGS $LDFLAGS -o $TMPE $TMPC $extralibs 2> /dev/null ; then + have_lrintf="yes" + # allanc@chickenandporn.com: cannot execute cross-compiled + # code on the host. Only execute if not cross-compiling. + if test -z "$cross_prefix" ; then + $TMPE 2> /dev/null > /dev/null || have_lrintf="no" + fi +fi + +_restrict= +for restrict_keyword in restrict __restrict__ __restrict; do + echo "void foo(char * $restrict_keyword p);" > $TMPC + if $cc -c -o $TMPO $TMPC 2> /dev/null; then + _restrict=$restrict_keyword + break; + fi +done + +# test gcc version to see if vector builtins can be used +# currently only used on i386 for MMX builtins +cat > $TMPC << EOF +#include +int main(void) { +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2) +return 0; +#else +#error no vector builtins +#endif +} +EOF + +builtin_vector=no +if $cc -msse -o $TMPO $TMPC 2> /dev/null ; then + builtin_vector=yes +fi + +# test for mm3dnow.h +cat > $TMPC << EOF +#include +int main(void) { +return 0; +} +EOF + +mm3dnow=no +if $cc -march=athlon -o $TMPO $TMPC 2> /dev/null ; then + mm3dnow=yes +fi + +# Probe for -Wdeclaration-after-statement +if test "$cc" = "gcc"; then + cat > $TMPC << EOF + int main( void ) { return 0; } +EOF + + if $cc -Wdeclaration-after-statement -Werror -o $TMPE $TMPC 2> /dev/null ; then + CFLAGS="$CFLAGS -Wdeclaration-after-statement" + fi +fi + +# dlopen/dlfcn.h probing + +cat > $TMPC << EOF +#include +int main( void ) { return (int) dlopen("foo", 0); } +EOF + +ldl=-ldl + +if $cc $CFLAGS $LDFLAGS -o $TMPE $TMPC -ldl > /dev/null 2>&1 ; then +dlfcn=yes +dlopen=yes +fi + +if $cc $CFLAGS $LDFLAGS -o $TMPE $TMPC > /dev/null 2>&1 ; then +dlfcn=yes +dlopen=yes +ldl="" +fi + +cat > $TMPC << EOF +int main( void ) { return (int) dlopen("foo", 0); } +EOF + +if $cc $CFLAGS $LDFLAGS -o $TMPE $TMPC -ldl > /dev/null 2>&1 ; then +dlopen=yes +fi + +if $cc $CFLAGS $LDFLAGS -o $TMPE $TMPC > /dev/null 2>&1 ; then +dlopen=yes +ldl="" +fi + +if test "$vhook" = "default" ; then + vhook="$dlopen" +fi + +if test "$vhook" = "yes" -o "$a52bin" = "yes" -o "$faadbin" = "yes"; then + extralibs="$extralibs $ldl" +fi + + +########################################## +# imlib check + +cat > $TMPC << EOF +#include +#include +int main( void ) { return (int) imlib_load_font("foo"); } +EOF + +imlib2=no +if $cc $CFLAGS $LDFLAGS -o $TMPE $TMPC -lImlib2 -lm > /dev/null 2>&1 ; then +imlib2=yes +fi + +########################################## +# FreeType check + +cat > $TMPC << EOF +#include +int main( void ) { return (int) FT_Init_FreeType(0); } +EOF + +freetype2=no +if test "x$targetos" != "xBeOS"; then + if (freetype-config --version) >/dev/null 2>&1 ; then + if $cc -o $TMPE $TMPC `freetype-config --cflags` `freetype-config --libs` > /dev/null 2>&1 ; then + freetype2=yes + fi + fi +fi + +########################################## +# SDL check + +cat > $TMPC << EOF +#include +#undef main /* We don't want SDL to override our main() */ +int main( void ) { return SDL_Init (SDL_INIT_VIDEO); } +EOF + +sdl_too_old=no +sdl=no +SDL_CONFIG="${cross_prefix}sdl-config" +if ("${SDL_CONFIG}" --version) >/dev/null 2>&1 ; then +if $cc -o $TMPE `"${SDL_CONFIG}" --cflags` $TMPC `"${SDL_CONFIG}" --libs` > /dev/null 2>&1 ; then +_sdlversion=`"${SDL_CONFIG}" --version | sed 's/[^0-9]//g'` +if test "$_sdlversion" -lt 121 ; then +sdl_too_old=yes +else +sdl=yes +fi +fi +fi + +########################################## +# texi2html check + +texi2html=no +if (texi2html -version) >/dev/null 2>&1; then +texi2html=yes +fi + +if test "$network" = "yes" ; then +########################################## +# IPv6 check + +cat > $TMPC << EOF +#include +#include +#include +#include +int main( void ) { + struct sockaddr_storage saddr; + struct ipv6_mreq mreq6; + getaddrinfo(0,0,0,0); + getnameinfo(0,0,0,0,0,0,0); + IN6_IS_ADDR_MULTICAST((const struct in6_addr *)0); +} +EOF + +ipv6=no +if $cc -o $TMPE $TMPC > /dev/null 2>&1 ; then +ipv6=yes +fi +fi + +if test "$v4l2" = "yes"; then +# check for video4linux2 --- V4L2_PIX_FMT_YUV420 +cat > $TMPC << EOF +#include +#include +int dummy = V4L2_PIX_FMT_YUV420; +struct v4l2_buffer dummy1; +EOF +$cc -c -o $TMPE $TMPC 2> /dev/null || v4l2="no" +fi + +case "`$cc -v 2>&1 | grep version`" in + *gcc*) + CFLAGS="-Wall -Wno-switch $CFLAGS" + ;; + *) + ;; +esac + +if test "$sdl" = "no" ; then + ffplay=no +fi + +if test "$debug" = "yes"; then + CFLAGS="-g $CFLAGS" +fi + +if test "$optimize" = "small"; then +# CFLAGS=${CFLAGS//-O3/-Os} + CFLAGS="$CFLAGS -Os" +fi + +if test "$optimize" = "yes"; then + if test -n "`$cc -v 2>&1 | grep xlc`"; then + CFLAGS="$CFLAGS -O5" + LDFLAGS="$LDFLAGS -O5" + else + CFLAGS="-O3 $CFLAGS" + fi +fi + +# PIC flags for shared library objects where they are needed +if test "$lshared" = "yes" ; then + # LIBOBJFLAGS may have already been set in the OS configuration + if test -z "$LIBOBJFLAGS" ; then + if test "$cpu" = "x86_64" -o "$cpu" = "ia64" -o "$cpu" = "alpha" ; then + LIBOBJFLAGS="\$(PIC)" + fi + fi +fi + +if test x"$bindir" = x""; then +bindir="${prefix}/bin" +fi + +if test x"$libdir" = x""; then +libdir="${prefix}/lib" +fi + +if test x"$incdir" = x""; then +incdir="${prefix}/include/ffmpeg" +fi + +if test x"$mandir" = x""; then +mandir="${prefix}/man" +fi + +echo "install prefix $prefix" +echo "source path $source_path" +echo "C compiler $cc" +echo "make $make" +echo "CPU $cpu ($tune)" +if test "$BUILDSUF" != ""; then +echo "build suffix $BUILDSUF" +fi +echo "big-endian $bigendian" +echo "inttypes.h $inttypes" +echo "broken inttypes.h $emu_fast_int" +if test $cpu = "x86" -o $cpu = "x86_64"; then +echo "MMX enabled $mmx" +echo "Vector Builtins $builtin_vector" +echo "3DNow! Builtins $mm3dnow" +fi +if test $cpu = "armv4l"; then +echo "IWMMXT enabled $iwmmxt" +fi +if test $cpu = "mips"; then +echo "MMI enabled $mmi" +fi +if test $cpu = "powerpc"; then +echo "AltiVec enabled $altivec" +fi +echo "gprof enabled $gprof" +echo "zlib enabled $zlib" +echo "lzo enabled $lzo" +echo "libgsm enabled $libgsm" +echo "mp3lame enabled $mp3lame" +echo "libogg enabled $libogg" +echo "Vorbis enabled $vorbis" +echo "Theora enabled $theora" +echo "FAAD enabled $faad" +echo "faadbin enabled $faadbin" +echo "FAAC enabled $faac" +echo "XviD enabled $xvid" +echo "x264 enabled $x264" +echo "a52 support $a52" +echo "a52 dlopened $a52bin" +echo "DTS support $dts" +echo "pp support $pp" +echo "debug symbols $debug" +echo "strip symbols $dostrip" +echo "optimize $optimize" +echo "static $lstatic" +echo "shared $lshared" +echo "video hooking $vhook" +echo "SDL support $sdl" +if test $sdl_too_old = "yes"; then +echo "-> Your SDL version is too old - please upgrade to have FFplay/SDL support." +fi + +if test "$vhook" = "yes" ; then +echo "Imlib2 support $imlib2" +echo "FreeType support $freetype2" +fi +echo "Sun medialib support" $sunmlib +echo "pthreads support" $pthreads +echo "AMR-NB float support" $amr_nb +echo "AMR-NB fixed support" $amr_nb_fixed +echo "AMR-WB float support" $amr_wb +echo "AMR-WB IF2 support" $amr_if2 +echo "network support $network" +if test "$network" = "yes" ; then +echo "IPv6 support $ipv6" +fi +if test "$gpl" = "no" ; then +echo "License: LGPL" +else +echo "License: GPL" +fi + +echo "Creating config.mak and config.h..." + +date >> config.log +echo " $0 $FFMPEG_CONFIGURATION" >> config.log +echo "# Automatically generated by configure - do not modify!" > config.mak +echo "/* Automatically generated by configure - do not modify! */" > $TMPH +echo "#define FFMPEG_CONFIGURATION "'"'"$FFMPEG_CONFIGURATION"'"' >> $TMPH + +echo "prefix=\$(DESTDIR)$prefix" >> config.mak +echo "libdir=\$(DESTDIR)$libdir" >> config.mak +echo "incdir=\$(DESTDIR)$incdir" >> config.mak +echo "bindir=\$(DESTDIR)$bindir" >> config.mak +echo "mandir=\$(DESTDIR)$mandir" >> config.mak +echo "MAKE=$make" >> config.mak +echo "CC=$cc" >> config.mak +echo "AR=$ar" >> config.mak +echo "RANLIB=$ranlib" >> config.mak +if test "$dostrip" = "yes" ; then +echo "STRIP=$strip" >> config.mak +echo "INSTALLSTRIP=$installstrip" >> config.mak +else +echo "STRIP=echo ignoring strip" >> config.mak +echo "INSTALLSTRIP=" >> config.mak +fi + +# SHCFLAGS is a copy of CFLAGS without -mdynamic-no-pic, used when building +# shared modules on OS/X (vhook/Makefile). +SHCFLAGS=$CFLAGS +if test "$needmdynamicnopic" = yes; then + CFLAGS="$CFLAGS -mdynamic-no-pic" +fi + +echo "OPTFLAGS=$CFLAGS" >> config.mak +echo "SHCFLAGS=$SHCFLAGS">>config.mak +echo "LDFLAGS=$LDFLAGS" >> config.mak +echo "LDCONFIG=$LDCONFIG" >> config.mak +echo "FFSLDFLAGS=$FFSLDFLAGS" >> config.mak +echo "SHFLAGS=$SHFLAGS" >> config.mak +echo "LIBOBJFLAGS=$LIBOBJFLAGS" >> config.mak +echo "BUILD_STATIC=$lstatic" >> config.mak +echo "BUILDSUF=$BUILDSUF" >> config.mak +echo "LIBPREF=$LIBPREF" >> config.mak +echo "LIBSUF=\${BUILDSUF}$LIBSUF" >> config.mak +if test "$lstatic" = "yes" ; then + echo "LIB=$LIB" >> config.mak +else # Some Make complain if this variable does not exist. + echo "LIB=" >> config.mak +fi +echo "SLIBPREF=$SLIBPREF" >> config.mak +echo "SLIBSUF=\${BUILDSUF}$SLIBSUF" >> config.mak +echo "EXESUF=\${BUILDSUF}$EXESUF" >> config.mak +echo "TARGET_OS=$TARGET_OS" >> config.mak +if test "$cpu" = "x86" ; then + echo "TARGET_ARCH_X86=yes" >> config.mak + echo "#define ARCH_X86 1" >> $TMPH +elif test "$cpu" = "x86_64" ; then + echo "TARGET_ARCH_X86_64=yes" >> config.mak + echo "#define ARCH_X86_64 1" >> $TMPH +elif test "$cpu" = "armv4l" ; then + echo "TARGET_ARCH_ARMV4L=yes" >> config.mak + echo "#define ARCH_ARMV4L 1" >> $TMPH +elif test "$cpu" = "alpha" ; then + echo "TARGET_ARCH_ALPHA=yes" >> config.mak + echo "#define ARCH_ALPHA 1" >> $TMPH +elif test "$cpu" = "sparc64" ; then + echo "TARGET_ARCH_SPARC64=yes" >> config.mak + echo "#define ARCH_SPARC64 1" >> $TMPH + echo "TARGET_ARCH_SPARC=yes" >> config.mak + echo "#define ARCH_SPARC 1" >> $TMPH +elif test "$cpu" = "sparc" ; then + echo "TARGET_ARCH_SPARC=yes" >> config.mak + echo "#define ARCH_SPARC 1" >> $TMPH +elif test "$cpu" = "powerpc" ; then + echo "TARGET_ARCH_POWERPC=yes" >> config.mak + echo "#define ARCH_POWERPC 1" >> $TMPH + if test $POWERPCMODE = "32bits"; then + echo "#define POWERPC_MODE_32BITS 1" >> $TMPH + else + echo "#define POWERPC_MODE_64BITS 1" >> $TMPH + fi + if test "$powerpc_perf" = "yes"; then + echo "#define POWERPC_PERFORMANCE_REPORT 1" >> $TMPH + fi +elif test "$cpu" = "mips" ; then + echo "TARGET_ARCH_MIPS=yes" >> config.mak + echo "#define ARCH_MIPS 1" >> $TMPH +elif test "$cpu" = "sh4" ; then + echo "TARGET_ARCH_SH4=yes" >> config.mak + echo "#define ARCH_SH4 1" >> $TMPH +elif test "$cpu" = "parisc" ; then + echo "TARGET_ARCH_PARISC=yes" >> config.mak + echo "#define ARCH_PARISC 1" >> $TMPH +elif test "$cpu" = "s390" ; then + echo "TARGET_ARCH_S390=yes" >> config.mak + echo "#define ARCH_S390 1" >> $TMPH +elif test "$cpu" = "m68k" ; then + echo "TARGET_ARCH_M68K=yes" >> config.mak + echo "#define ARCH_M68K 1" >> $TMPH +elif test "$cpu" = "ia64" ; then + echo "TARGET_ARCH_IA64=yes" >> config.mak + echo "#define ARCH_IA64 1" >> $TMPH +fi +echo "#define TUNECPU $TUNECPU" >> $TMPH +if test "$bigendian" = "yes" ; then + echo "WORDS_BIGENDIAN=yes" >> config.mak + echo "#define WORDS_BIGENDIAN 1" >> $TMPH +fi +if test "$inttypes" != "yes" ; then + echo "#define EMULATE_INTTYPES 1" >> $TMPH +fi +if test "$emu_fast_int" = "yes" ; then + echo "#define EMULATE_FAST_INT 1" >> $TMPH +fi +if test "$mmx" = "yes" ; then + echo "TARGET_MMX=yes" >> config.mak + echo "#define HAVE_MMX 1" >> $TMPH + echo "#define __CPU__ 586" >> $TMPH +fi +if test "$builtin_vector" = "yes" ; then + echo "TARGET_BUILTIN_VECTOR=yes" >> config.mak + echo "#define HAVE_BUILTIN_VECTOR 1" >> $TMPH +fi +if test "$mm3dnow" = "yes" ; then + echo "TARGET_BUILTIN_3DNOW=yes" >> config.mak + echo "#define HAVE_MM3DNOW 1" >> $TMPH +fi +if test "$iwmmxt" = "yes" ; then + echo "TARGET_IWMMXT=yes" >> config.mak + echo "#define HAVE_IWMMXT 1" >> $TMPH +fi +if test "$mmi" = "yes" ; then + echo "TARGET_MMI=yes" >> config.mak + echo "#define HAVE_MMI 1" >> $TMPH +fi +if test "$altivec" = "yes" ; then + echo "TARGET_ALTIVEC=yes" >> config.mak + echo "#define HAVE_ALTIVEC 1" >> $TMPH + echo "// Enable the next line to use the reference C code instead of AltiVec" >> $TMPH + echo "// #define ALTIVEC_USE_REFERENCE_C_CODE 1" >> $TMPH + if test "$_altivec_h" = "yes" ; then + echo "#define HAVE_ALTIVEC_H 1" >> $TMPH + else + echo "#undef HAVE_ALTIVEC_H" >> $TMPH + fi +fi +if test "$gprof" = "yes" ; then + echo "TARGET_GPROF=yes" >> config.mak + echo "#define HAVE_GPROF 1" >> $TMPH +fi +if test "$localtime_r" = "yes" ; then + echo "#define HAVE_LOCALTIME_R 1" >> $TMPH +fi +if test "$imlib2" = "yes" ; then + echo "HAVE_IMLIB2=yes" >> config.mak +fi +if test "$freetype2" = "yes" ; then + echo "HAVE_FREETYPE2=yes" >> config.mak +fi +if test "$sunmlib" = "yes" ; then + echo "HAVE_MLIB=yes" >> config.mak + echo "#define HAVE_MLIB 1" >> $TMPH + extralibs="$extralibs -lmlib" +fi +if test "$pthreads" = "yes" ; then + echo "HAVE_PTHREADS=yes" >> config.mak + echo "#define HAVE_PTHREADS 1" >> $TMPH + echo "#define HAVE_THREADS 1" >> $TMPH + if test $targetos != FreeBSD -a $targetos != OpenBSD ; then + extralibs="$extralibs -lpthread" + fi +fi +if test "$sdl" = "yes" ; then + echo "CONFIG_SDL=yes" >> config.mak + echo "SDL_LIBS=`"${SDL_CONFIG}" --libs`" >> config.mak + echo "SDL_CFLAGS=`"${SDL_CONFIG}" --cflags`" >> config.mak +fi +if test "$texi2html" = "yes"; then + echo "BUILD_DOC=yes" >> config.mak +fi +if test "$have_lrintf" = "yes" ; then + echo "#define HAVE_LRINTF 1" >> $TMPH +fi +if test "$vhook" = "yes" ; then + echo "BUILD_VHOOK=yes" >> config.mak + echo "#define HAVE_VHOOK 1" >> $TMPH +fi + +pp_version=`grep '#define LIBPOSTPROC_VERSION ' "$source_path/libavcodec/libpostproc/postprocess.h" | sed 's/[^0-9\.]//g'` +lavc_version=`grep '#define LIBAVCODEC_VERSION ' "$source_path/libavcodec/avcodec.h" | sed 's/[^0-9\.]//g'` +lavf_version=`grep '#define LIBAVFORMAT_VERSION ' "$source_path/libavformat/avformat.h" | sed 's/[^0-9\.]//g'` +lavu_version=`grep '#define LIBAVUTIL_VERSION ' "$source_path/libavutil/avutil.h" | sed 's/[^0-9\.]//g'` + + + +if test "$lshared" = "yes" ; then + echo "#define BUILD_SHARED_AV 1" >> $TMPH + echo "BUILD_SHARED=yes" >> config.mak + echo "PIC=-fPIC -DPIC" >> config.mak + echo "SPPMAJOR=${lavc_version%%.*}" >> config.mak + echo "SPPVERSION=$lavc_version" >> config.mak + echo "LAVCMAJOR=${lavc_version%%.*}" >> config.mak + echo "LAVCVERSION=$lavc_version" >> config.mak + echo "LAVFMAJOR=${lavf_version%%.*}" >> config.mak + echo "LAVFVERSION=$lavf_version" >> config.mak + echo "LAVUMAJOR=${lavu_version%%.*}" >> config.mak + echo "LAVUVERSION=$lavu_version" >> config.mak + echo "SLIBNAME=${SLIBNAME}" >> config.mak + echo "SLIBNAME_WITH_VERSION=${SLIBNAME_WITH_VERSION}" >> config.mak + echo "SLIBNAME_WITH_MAJOR=${SLIBNAME_WITH_MAJOR}" >> config.mak +fi +echo "EXTRALIBS=$extralibs" >> config.mak +version=`grep '#define FFMPEG_VERSION ' "$source_path/libavcodec/avcodec.h" | + cut -d '"' -f 2` +echo "VERSION=$version" >>config.mak +# If you do not want to use encoders, disable them. +echo "#define CONFIG_ENCODERS 1" >> $TMPH +echo "CONFIG_ENCODERS=yes" >> config.mak + +# If you do not want to use decoders, disable them. +echo "#define CONFIG_DECODERS 1" >> $TMPH +echo "CONFIG_DECODERS=yes" >> config.mak + +# muxers +if test "$muxers" = "yes" ; then + echo "#define CONFIG_MUXERS 1" >> $TMPH + echo "CONFIG_MUXERS=yes" >> config.mak +fi + +# demuxers +if test "$demuxers" = "yes" ; then + echo "#define CONFIG_DEMUXERS 1" >> $TMPH + echo "CONFIG_DEMUXERS=yes" >> config.mak +fi + +# AC3 +if test "$a52" = "yes" ; then + echo "#define CONFIG_AC3 1" >> $TMPH + echo "CONFIG_AC3=yes" >> config.mak + + if test "$a52bin" = "yes" ; then + echo "#define CONFIG_A52BIN 1" >> $TMPH + echo "CONFIG_A52BIN=yes" >> config.mak + fi +fi + +# DTS +if test "$dts" = "yes" ; then + echo "#define CONFIG_DTS 1" >> $TMPH + echo "CONFIG_DTS=yes" >> config.mak +fi + +# PP +if test "$pp" = "yes" ; then + echo "#define CONFIG_PP 1" >> $TMPH + echo "CONFIG_PP=yes" >> config.mak +fi + +# MPEG audio high precision mode +if test "$mpegaudio_hp" = "yes" ; then + echo "#define CONFIG_MPEGAUDIO_HP 1" >> $TMPH +fi + +if test "$v4l" = "yes" ; then + echo "#define CONFIG_VIDEO4LINUX 1" >> $TMPH + echo "CONFIG_VIDEO4LINUX=yes" >> config.mak +fi + +if test "$v4l2" = "yes" ; then + echo "#define CONFIG_VIDEO4LINUX2 1" >> $TMPH + echo "CONFIG_VIDEO4LINUX2=yes" >> config.mak +fi + +if test "$bktr" = "yes" ; then + echo "#define CONFIG_BKTR 1" >> $TMPH + echo "CONFIG_BKTR=yes" >> config.mak +fi + +if test "$dv1394" = "yes" ; then + echo "#define CONFIG_DV1394 1" >> $TMPH + echo "CONFIG_DV1394=yes" >> config.mak +fi + +if test "$dc1394" = "yes" ; then + echo "#define CONFIG_DC1394 1" >> $TMPH + echo "CONFIG_DC1394=yes" >> config.mak +fi + +if test "$dlopen" = "yes" ; then + echo "#define CONFIG_HAVE_DLOPEN 1" >> $TMPH +fi + +if test "$dlfcn" = "yes" ; then + echo "#define CONFIG_HAVE_DLFCN 1" >> $TMPH +fi + +if test "$audio_oss" = "yes" ; then + echo "#define CONFIG_AUDIO_OSS 1" >> $TMPH + echo "CONFIG_AUDIO_OSS=yes" >> config.mak +fi + +if test "$audio_beos" = "yes" ; then + echo "#define CONFIG_AUDIO_BEOS 1" >> $TMPH + echo "CONFIG_AUDIO_BEOS=yes" >> config.mak +fi + +if test "$network" = "yes" ; then + echo "#define CONFIG_NETWORK 1" >> $TMPH + echo "CONFIG_NETWORK=yes" >> config.mak +fi + +if test "$ipv6" = "yes" ; then + echo "#define CONFIG_IPV6 1" >> $TMPH +fi + +if test "$zlib" = "yes" ; then + echo "#define CONFIG_ZLIB 1" >> $TMPH + echo "CONFIG_ZLIB=yes" >> config.mak +fi + +if test "$lzo" = "yes" ; then + echo "#define CONFIG_LZO 1" >> $TMPH + echo "CONFIG_LZO=yes" >> config.mak +fi + +if test "$libgsm" = "yes" ; then + echo "#define CONFIG_LIBGSM 1" >> $TMPH + echo "CONFIG_LIBGSM=yes" >> config.mak +fi + +if test "$mp3lame" = "yes" ; then + echo "#define CONFIG_MP3LAME 1" >> $TMPH + echo "CONFIG_MP3LAME=yes" >> config.mak +fi + +if test "$libogg" = "yes" ; then + echo "#define CONFIG_LIBOGG 1" >> $TMPH + echo "CONFIG_LIBOGG=yes" >> config.mak +fi + +if test "$vorbis" = "yes" ; then + echo "#define CONFIG_LIBVORBIS 1" >> $TMPH + echo "CONFIG_LIBVORBIS=yes" >> config.mak +fi + +if test "$theora" = "yes" ; then + echo "#define CONFIG_LIBTHEORA 1" >> $TMPH + echo "CONFIG_LIBTHEORA=yes" >> config.mak +fi + +if test "$faad" = "yes" ; then + echo "#define CONFIG_FAAD 1" >> $TMPH + echo "CONFIG_FAAD=yes" >> config.mak +fi + +if test "$faadbin" = "yes" ; then + echo "#define CONFIG_FAADBIN 1" >> $TMPH + echo "CONFIG_FAADBIN=yes" >> config.mak +fi + +if test "$faac" = "yes" ; then + echo "#define CONFIG_FAAC 1" >> $TMPH + echo "CONFIG_FAAC=yes" >> config.mak +fi + +if test "$xvid" = "yes" ; then + echo "#define CONFIG_XVID 1" >> $TMPH + echo "CONFIG_XVID=yes" >> config.mak +fi + +if test "$x264" = "yes" ; then + echo "#define CONFIG_X264 1" >> $TMPH + echo "CONFIG_X264=yes" >> config.mak +fi + +if test "$mingw32" = "yes" ; then + echo "#define CONFIG_WIN32 1" >> $TMPH + echo "CONFIG_WIN32=yes" >> config.mak + echo "HAVE_W32THREADS=yes" >> config.mak + echo "#define HAVE_W32THREADS 1" >> $TMPH + echo "#define HAVE_THREADS 1" >> $TMPH + echo "#ifndef __MINGW32__" >> $TMPH + echo "#define __MINGW32__ 1" >> $TMPH + echo "#endif" >> $TMPH +fi + +if test "$mingwce" = "yes" ; then + echo "#define CONFIG_WIN32 1" >> $TMPH + echo "CONFIG_WIN32=yes" >> config.mak + echo "#define CONFIG_WINCE 1" >> $TMPH + echo "CONFIG_WINCE=yes" >> config.mak + echo "#ifndef __MINGW32__" >> $TMPH + echo "#define __MINGW32__ 1" >> $TMPH + echo "#endif" >> $TMPH +fi + +if test "$os2" = "yes" ; then + echo "#define CONFIG_OS2 1" >> $TMPH + echo "CONFIG_OS2=yes" >> config.mak + echo "HAVE_OS2THREADS=yes" >> config.mak + echo "#define HAVE_OS2THREADS 1" >> $TMPH + echo "#define HAVE_THREADS 1" >> $TMPH +fi + +if test "$TARGET_OS" = "SunOS" ; then + echo "#define CONFIG_SUNOS 1" >> $TMPH +fi + +if test "$TARGET_OS" = "BeOS" ; then + echo "HAVE_BEOSTHREADS=yes" >> config.mak + echo "#define HAVE_BEOSTHREADS 1" >> $TMPH + echo "#define HAVE_THREADS 1" >> $TMPH +fi + +if test "$darwin" = "yes"; then + echo "#define CONFIG_DARWIN 1" >> $TMPH + echo "CONFIG_DARWIN=yes" >> config.mak +fi + +if test "$_malloc_h" = "yes" ; then + echo "#define HAVE_MALLOC_H 1" >> $TMPH +else + echo "#undef HAVE_MALLOC_H" >> $TMPH +fi + +if test "$_memalign" = "yes" ; then + echo "#define HAVE_MEMALIGN 1" >> $TMPH +else + echo "#undef HAVE_MEMALIGN" >> $TMPH +fi + +if test "$memalignhack" = "yes" ; then + echo "#define MEMALIGN_HACK 1" >> $TMPH +fi + + +if test "$netserver" = "yes" ; then + echo "#define CONFIG_BEOS_NETSERVER 1" >> $TMPH + echo "CONFIG_BEOS_NETSERVER=yes" >> config.mak +fi + +if test "$need_inet_aton" = "yes" ; then + echo "NEED_INET_ATON=yes" >> config.mak +fi + +if test "$simpleidct" = "yes" ; then + echo "#define SIMPLE_IDCT 1" >> $TMPH +fi + +if test "$protocols" = "yes" ; then + echo "#define CONFIG_PROTOCOLS 1" >> $TMPH + echo "CONFIG_PROTOCOLS=yes" >> config.mak +fi + +if test "$ffserver" = "yes" ; then + echo "#define CONFIG_FFSERVER 1" >> $TMPH + echo "CONFIG_FFSERVER=yes" >> config.mak +fi + +if test "$ffplay" = "yes" ; then + echo "CONFIG_FFPLAY=yes" >> config.mak +fi + +if test "$gpl" = "yes" ; then + echo "#define CONFIG_GPL 1" >> $TMPH + echo "CONFIG_GPL=yes" >> config.mak +fi + +echo "#define restrict $_restrict" >> $TMPH + +if test "$optimize" = "small"; then + echo "#define always_inline" >> $TMPH +fi + +# build tree in object directory if source path is different from current one +if test "$source_path_used" = "yes" ; then + DIRS="\ + doc \ + libavformat \ + libavcodec \ + libavcodec/alpha \ + libavcodec/armv4l \ + libavcodec/i386 \ + libavcodec/sparc \ + libavcodec/mlib \ + libavcodec/ppc \ + libavcodec/liba52 \ + libavcodec/libpostproc \ + libavutil \ + tests \ + vhook \ + " + FILES="\ + Makefile \ + libavformat/Makefile \ + libavcodec/Makefile \ + libavcodec/libpostproc/Makefile \ + libavutil/Makefile \ + tests/Makefile \ + vhook/Makefile \ + doc/Makefile \ + doc/texi2pod.pl \ + " + for dir in $DIRS ; do + mkdir -p $dir + done + for f in $FILES ; do + ln -sf "$source_path/$f" $f + done +fi +echo "SRC_PATH=$source_path" >> config.mak +echo "BUILD_ROOT=$PWD" >> config.mak + +if test "$amr_wb" = "yes" ; then + echo "#define AMR_WB 1" >> $TMPH + echo "AMR_WB=yes" >> config.mak + echo + echo "AMR WB FLOAT NOTICE ! Make sure you have downloaded TS26.204" + echo "V5.1.0 from " + echo "http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-510.zip" + echo "and extracted the source to libavcodec/amrwb_float" + echo +fi + +if test "$amr_nb" = "yes" ; then + echo "#define AMR_NB 1" >> $TMPH + echo "AMR_NB=yes" >> config.mak + echo +if test "$amr_nb_fixed" = "yes" ; then + echo "AMR_NB_FIXED=yes" >> config.mak + echo "#define AMR_NB_FIXED 1" >> $TMPH + echo + echo "AMR NB FIXED POINT NOTICE! Make sure you have downloaded TS26.073 " + echo "REL-5 version 5.1.0 from " + echo "http://www.3gpp.org/ftp/Specs/latest/Rel-5/26_series/26073-5??.zip" + echo "and extracted src to libavcodec/amr" + echo "You must also add -DMMS_IO and remove -pedantic-errors to/from CFLAGS in libavcodec/amr/makefile." + echo "i.e. CFLAGS = -Wall -I. \$(CFLAGS_\$(MODE)) -D\$(VAD) -DMMS_IO" + echo +else + echo "AMR NB FLOAT NOTICE ! Make sure you have downloaded TS26.104" + echo "REL-5 V5.1.0 from " + echo "http://www.3gpp.org/ftp/Specs/latest/Rel-5/26_series/26104-5??.zip" + echo "and extracted the source to libavcodec/amr_float" + echo "If you try this on alpha, you may need to change Word32 to int in amr/typedef.h" + echo +fi + +if test "$amr_if2" = "yes" ; then + echo "AMR_CFLAGS=-DIF2=1" >> config.mak +fi + +fi + +for codec in $CODEC_LIST ; do + echo "#define CONFIG_`echo $codec | tr a-z A-Z` 1" >> $TMPH + echo "CONFIG_`echo $codec | tr a-z A-Z`=yes" >> config.mak +done + +# Do not overwrite config.h if unchanged to avoid superfluous rebuilds. +diff $TMPH config.h >/dev/null 2>&1 +if test "$?" != "0" ; then + mv -f $TMPH config.h +else + echo "config.h is unchanged" +fi + +rm -f $TMPO $TMPC $TMPE $TMPS $TMPH + + +# build pkg-config files libav*.pc and libpostproc.pc +# libavutil.pc +cat <libavutil.pc +prefix=$prefix +exec_prefix=\${prefix} +libdir=\${exec_prefix}/lib +includedir=\${prefix}/include + +Name: libavutil +Description: FFmpeg utility library +Version: $lavu_version +Requires: +Conflicts: +Libs: -L\${libdir} -lavutil +Cflags: -I\${includedir} -I\${includedir}/ffmpeg +EOF + +cat <libavutil-uninstalled.pc +prefix= +exec_prefix= +libdir=\${pcfiledir}/libavutil +includedir=\${pcfiledir}/libavutil + +Name: libavutil +Description: FFmpeg utility library +Version: $lavu_version +Requires: +Conflicts: +Libs: \${libdir}/${LIBPREF}avutil${LIBSUF} +Cflags: -I\${includedir} +EOF + +# libavcodec.pc +cat <libavcodec.pc +prefix=$prefix +exec_prefix=\${prefix} +libdir=\${exec_prefix}/lib +includedir=\${prefix}/include + +Name: libavcodec +Description: FFmpeg codec library +Version: $lavc_version +Requires: $pkg_requires libavutil = $lavu_version +Conflicts: +Libs: -L\${libdir} -lavcodec $extralibs +Cflags: -I\${includedir} -I\${includedir}/ffmpeg +EOF + +cat <libavcodec-uninstalled.pc +prefix= +exec_prefix= +libdir=\${pcfiledir}/libavcodec +includedir=\${pcfiledir}/libavcodec + +Name: libavcodec +Description: FFmpeg codec library +Version: $lavc_version +Requires: $pkg_requires libavutil = $lavu_version +Conflicts: +Libs: \${libdir}/${LIBPREF}avcodec${LIBSUF} $extralibs +Cflags: -I\${includedir} +EOF + +# libavformat.pc +cat <libavformat.pc +prefix=$prefix +exec_prefix=\${prefix} +libdir=\${exec_prefix}/lib +includedir=\${prefix}/include + +Name: libavformat +Description: FFmpeg container format library +Version: $lavf_version +Requires: $pkg_requires libavcodec = $lavc_version +Conflicts: +Libs: -L\${libdir} -lavformat $extralibs +Cflags: -I\${includedir} -I\${includedir}/ffmpeg +EOF + +cat <libavformat-uninstalled.pc +prefix= +exec_prefix= +libdir=\${pcfiledir}/libavformat +includedir=\${pcfiledir}/libavformat + +Name: libavformat +Description: FFmpeg container format library +Version: $lavf_version +Requires: $pkg_requires libavcodec = $lavc_version +Conflicts: +Libs: \${libdir}/${LIBPREF}avformat${LIBSUF} $extralibs +Cflags: -I\${includedir} +EOF + + +# libpostproc.pc +cat <libpostproc.pc +prefix=$prefix +exec_prefix=\${prefix} +libdir=\${exec_prefix}/lib +includedir=\${prefix}/include + +Name: libpostproc +Description: FFmpeg post processing library +Version: $lavc_version +Requires: +Conflicts: +Libs: -L\${libdir} -lpostproc +Cflags: -I\${includedir} -I\${includedir}/postproc +EOF + +cat <libpostproc-uninstalled.pc +prefix= +exec_prefix= +libdir=\${pcfiledir}/libavcodec/libpostproc +includedir=\${pcfiledir}/libavcodec/libpostproc + +Name: libpostproc +Description: FFmpeg post processing library +Version: $lavc_version +Requires: +Conflicts: +Libs: \${libdir}/${LIBPREF}postproc${LIBSUF} +Cflags: -I\${includedir} +EOF diff --git a/mpeg4/src/cws2fws.c b/mpeg4/src/cws2fws.c new file mode 100644 index 0000000000000000000000000000000000000000..eb02f58171924cc5b6cff5251751caac28a0830f --- /dev/null +++ b/mpeg4/src/cws2fws.c @@ -0,0 +1,126 @@ +/* + * cws2fws by Alex Beregszaszi + * + * This utility converts compressed Macromedia Flash files to uncompressed ones. + * + */ + +#include +#include +#include +#include + +#ifdef DEBUG +#define dbgprintf printf +#else +#define dbgprintf +#endif + +main(int argc, char *argv[]) +{ + int fd_in, fd_out, comp_len, uncomp_len, tag, i, last_out; + char buf_in[1024], buf_out[65536]; + z_stream zstream; + struct stat statbuf; + + if (argc < 3) + { + printf("Usage: %s \n", argv[0]); + exit(1); + } + + fd_in = open(argv[1], O_RDONLY); + if (fd_in < 0) + { + perror("Error while opening: "); + exit(1); + } + + fd_out = open(argv[2], O_WRONLY|O_CREAT, 00644); + if (fd_out < 0) + { + perror("Error while opening: "); + close(fd_in); + exit(1); + } + + if (read(fd_in, &buf_in, 8) != 8) + { + printf("Header error\n"); + close(fd_in); + close(fd_out); + exit(1); + } + + if (buf_in[0] != 'C' || buf_in[1] != 'W' || buf_in[2] != 'S') + { + printf("Not a compressed flash file\n"); + exit(1); + } + + fstat(fd_in, &statbuf); + comp_len = statbuf.st_size; + uncomp_len = buf_in[4] | (buf_in[5] << 8) | (buf_in[6] << 16) | (buf_in[7] << 24); + + printf("Compressed size: %d Uncompressed size: %d\n", comp_len-4, uncomp_len-4); + + // write out modified header + buf_in[0] = 'F'; + write(fd_out, &buf_in, 8); + + zstream.zalloc = NULL; + zstream.zfree = NULL; + zstream.opaque = NULL; + inflateInit(&zstream); + + for (i = 0; i < comp_len-8;) + { + int ret, len = read(fd_in, &buf_in, 1024); + + dbgprintf("read %d bytes\n", len); + + last_out = zstream.total_out; + + zstream.next_in = &buf_in[0]; + zstream.avail_in = len; + zstream.next_out = &buf_out[0]; + zstream.avail_out = 65536; + + ret = inflate(&zstream, Z_SYNC_FLUSH); + if (ret != Z_STREAM_END && ret != Z_OK) + { + printf("Error while decompressing: %d\n", ret); + inflateEnd(&zstream); + exit(1); + } + + dbgprintf("a_in: %d t_in: %d a_out: %d t_out: %d -- %d out\n", + zstream.avail_in, zstream.total_in, zstream.avail_out, zstream.total_out, + zstream.total_out-last_out); + + write(fd_out, &buf_out, zstream.total_out-last_out); + + i += len; + + if (ret == Z_STREAM_END || ret == Z_BUF_ERROR) + break; + } + + if (zstream.total_out != uncomp_len-8) + { + printf("Size mismatch (%d != %d), updating header...\n", + zstream.total_out, uncomp_len-8); + + buf_in[0] = (zstream.total_out+8) & 0xff; + buf_in[1] = (zstream.total_out+8 >> 8) & 0xff; + buf_in[2] = (zstream.total_out+8 >> 16) & 0xff; + buf_in[3] = (zstream.total_out+8 >> 24) & 0xff; + + lseek(fd_out, 4, SEEK_SET); + write(fd_out, &buf_in, 4); + } + + inflateEnd(&zstream); + close(fd_in); + close(fd_out); +} diff --git a/mpeg4/src/doc/.cvsignore b/mpeg4/src/doc/.cvsignore new file mode 100644 index 0000000000000000000000000000000000000000..39bd10002fde8b137ce6f94412b7f96363ea0912 --- /dev/null +++ b/mpeg4/src/doc/.cvsignore @@ -0,0 +1,8 @@ +faq.html +ffmpeg-doc.html +ffmpeg.1 +ffplay-doc.html +ffplay.1 +ffserver-doc.html +ffserver.1 +hooks.html diff --git a/mpeg4/src/doc/Makefile b/mpeg4/src/doc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..744a9fbc18dcec1a9f0a3046f462c651b75f00a8 --- /dev/null +++ b/mpeg4/src/doc/Makefile @@ -0,0 +1,18 @@ +-include ../config.mak + +VPATH=$(SRC_PATH)/doc + +all: ffmpeg-doc.html faq.html ffserver-doc.html ffplay-doc.html hooks.html \ + ffmpeg.1 ffserver.1 ffplay.1 + +%.html: %.texi Makefile + texi2html -monolithic -number $< + +%.pod: %-doc.texi + ./texi2pod.pl $< $@ + +%.1: %.pod + pod2man --section=1 --center=" " --release=" " $< > $@ + +clean: + rm -f *.html *.pod *.1 diff --git a/mpeg4/src/doc/TODO b/mpeg4/src/doc/TODO new file mode 100644 index 0000000000000000000000000000000000000000..ed0b637717bab569efd6873d6d11514a327c914a --- /dev/null +++ b/mpeg4/src/doc/TODO @@ -0,0 +1,74 @@ +ffmpeg TODO list: +---------------- + +Fabrice's TODO list: (unordered) +------------------- +Short term: + +- av_read_frame() API +- seeking API and example in ffplay +- parse_only mode +- use AVFMTCTX_DISCARD_PKT in ffplay so that DV has a chance to work +- add RTSP regression test (both client and server) +- make ffserver allocate AVFormatContext +- clean up (incompatible change, for 0.5.0): + * AVStream -> AVComponent + * AVFormatContext -> AVInputStream/AVOutputStream + * suppress rate_emu from AVCodecContext +- add av_log() in libavcodec and suppress all printf +- add new float/integer audio filterting and conversion : suppress + CODEC_ID_PCM_xxc and use CODEC_ID_RAWAUDIO. +- fix telecine and frame rate conversion + +Long term (ask me if you want to help): + +- commit new imgconvert API and new PIX_FMT_xxx alpha formats +- commit new LGPL'ed float and integer-only AC3 decoder +- add WMA integer-only decoder +- integrate custom Vorbis Tremor based integer and float decoder +- add new MPEG4-AAC audio decoder (both integer-only and float version) + +Michael's TODO list: (unordered) (if anyone wanna help with sth, just ask) +------------------- +- finish implementation of WMV2 j-picture +- H264 encoder +- H264 B frames +- optimize H264 CABAC +- per MB ratecontrol (so VCD and such do work better) +- more optimizations +- simper rate control +- split h263.c +- reverse engeneer RV30/RV40 +- finish NUT implementation + +Francois' TODO list: (unordered, without any timeframe) +------------------- +- test MACE decoder against the openquicktime one as suggested by A'rpi +- mov/mp4 muxer +- BeOS audio input grabbing backend +- BeOS video input grabbing backend +- have a REAL BeOS errno fix (return MKERROR(EXXX);), not a hack +- publish my BeOS libposix on BeBits so I can officially support ffserver :) +- check the whole code for thread-safety (global and init stuff) + +Philip'a TODO list: (alphabetically ordered) (please help) +------------------ +- Add a multi-ffm filetype so that feeds can be recorded into multiple files rather + than one big file. +- Authenticated users support -- where the authentication is in the URL +- Change ASF files so that the embedded timestamp in the frames is right rather + than being an offset from the start of the stream +- Make ffm files more resilient to changes in the codec structures so that you + can play old ffm files. + +unassigned TODO: (stuff which should/must be done but noone seems to do it) +--------------- +- use AVFrame for audio codecs too +- H264 interlacing +- rework aviobuf.c buffering strategy and fix url_fskip +- do audio resampling with bandlimited interpolation: + http://ccrma-www.stanford.edu/~jos/resample/resample.html +- add seeking support to the command line +- generate optimal huffman tables for mjpeg encoding +- fix ffserver regression tests + diff --git a/mpeg4/src/doc/faq.texi b/mpeg4/src/doc/faq.texi new file mode 100644 index 0000000000000000000000000000000000000000..ec022d5bcb47c0b95dc9c28c52b669d452abc537 --- /dev/null +++ b/mpeg4/src/doc/faq.texi @@ -0,0 +1,220 @@ +\input texinfo @c -*- texinfo -*- + +@settitle FFmpeg FAQ +@titlepage +@sp 7 +@center @titlefont{FFmpeg FAQ} +@sp 3 +@end titlepage + + +@chapter General Problems + +@section I cannot read this file although this format seems to be supported by ffmpeg. + +Even if ffmpeg can read the file format, it may not support all its +codecs. Please consult the supported codec list in the ffmpeg +documentation. + +@section How do I encode JPEGs to another format ? + +If the JPEGs are named img1.jpg, img2.jpg, img3.jpg,..., use: + +@example + ffmpeg -i img%d.jpg /tmp/a.mpg +@end example + +@samp{%d} is replaced by the image number. + +@file{img%03d.jpg} generates @file{img001.jpg}, @file{img002.jpg}, etc... + +The same system is used for the other image formats. + +@section FFmpeg does not support codec XXX. Can you include a Windows DLL loader to support it ? + +No. FFmpeg only supports open source codecs. Windows DLLs are not +portable, bloated and often slow. + +@section Why do I see a slight quality degradation with multithreaded MPEG* encoding ? + +For multithreaded MPEG* encoding, the encoded slices must be independent, +otherwise thread n would practically have to wait for n-1 to finish, so it's +quite logical that there is a small reduction of quality. This is not a bug. + +@section How can I read from the standard input or write to the standard output ? + +Use @file{-} as filename. + +@section Why does ffmpeg not decode audio in VOB files ? + +The audio is AC3 (a.k.a. A/52). AC3 decoding is an optional component in ffmpeg +as the component that handles AC3 decoding (liba52) is currently released under +the GPL. If you have liba52 installed on your system, enable AC3 decoding +with @code{./configure --enable-a52}. Take care: by +enabling AC3, you automatically change the license of libavcodec from +LGPL to GPL. + +@section Which codecs are supported by Windows ? + +Windows does not support standard formats like MPEG very well, unless you +install some additional codecs + +The following list of video codecs should work on most Windows systems: +@table @option +@item msmpeg4v2 +.avi/.asf +@item msmpeg4 +.asf only +@item wmv1 +.asf only +@item wmv2 +.asf only +@item mpeg4 +only if you have some MPEG-4 codec installed like ffdshow or XviD +@item mpeg1 +.mpg only +@end table +Note, ASF files often have .wmv or .wma extensions in Windows. It should also +be mentioned that Microsoft claims a patent on the ASF format, and may sue +or threaten users who create ASF files with non-Microsoft software. It is +strongly advised to avoid ASF where possible. + +The following list of audio codecs should work on most Windows systems: +@table @option +@item adpcm_ima_wav +@item adpcm_ms +@item pcm +@item mp3 +if some MP3 codec like LAME is installed +@end table + +@section Why does the chrominance data seem to be sampled at a different time from the luminance data on bt8x8 captures on Linux? + +This is a well-known bug in the bt8x8 driver. For 2.4.26 there is a patch at +(@url{http://mplayerhq.hu/~michael/bttv-420-2.4.26.patch}). This may also +apply cleanly to other 2.4-series kernels. + +@section How do I avoid the ugly aliasing artifacts in bt8x8 captures on Linux? + +Pass 'combfilter=1 lumafilter=1' to the bttv driver. Note though that 'combfilter=1' +will cause somewhat too strong filtering. A fix is to apply (@url{http://mplayerhq.hu/~michael/bttv-comb-2.4.26.patch}) +or (@url{http://mplayerhq.hu/~michael/bttv-comb-2.6.6.patch}) +and pass 'combfilter=2'. + +@section I have a problem with an old version of ffmpeg; where should I report it? +Nowhere. Upgrade to the latest release or if there is no recent release upgrade +to CVS. You could also try to report it. Maybe you will get lucky and +become the first person in history to get an answer different from "upgrade +to CVS". + +@section -f jpeg doesn't work. + +Try '-f image -img jpeg test%d.jpg'. + +@section Why can I not change the framerate? + +Some codecs, like MPEG-1/2, only allow a small number of fixed framerates. +Choose a different codec with the -vcodec command line option. + +@section ffmpeg does not work; What is wrong? + +Try a 'make distclean' in the ffmpeg source directory. If this does not help see +(@url{http://ffmpeg.org/bugreports.php}). + +@section How do I encode XviD or DivX video with ffmpeg? + +Both XviD and DivX (version 4+) are implementations of the ISO MPEG-4 +standard (note that there are many other coding formats that use this +same standard). Thus, use '-vcodec mpeg4' to encode these formats. The +default fourcc stored in an MPEG-4-coded file will be 'FMP4'. If you want +a different fourcc, use the '-vtag' option. E.g., '-vtag xvid' will +force the fourcc 'xvid' to be stored as the video fourcc rather than the +default. + +@chapter Development + +@section When will the next FFmpeg version be released? / Why are FFmpeg releases so few and far between? + +Like most open source projects FFmpeg suffers from a certain lack of +manpower. For this reason the developers have to prioritize the work +they do and putting out releases is not at the top of the list, fixing +bugs and reviewing patches takes precedence. Please don't complain or +request more timely and/or frequent releases unless you are willing to +help out creating them. + +@section Why doesn't FFmpeg support feature [xyz]? + +Because no one has taken on that task yet. FFmpeg development is +driven by the tasks that are important to the individual developers. +If there is a feature that is important to you, the best way to get +it implemented is to undertake the task yourself. + + +@section Are there examples illustrating how to use the FFmpeg libraries, particularly libavcodec and libavformat ? + +Yes. Read the Developers Guide of the FFmpeg documentation. Alternatively, +examine the source code for one of the many open source projects that +already incorporate ffmpeg at (@url{projects.php}). + +@section Can you support my C compiler XXX ? + +No. Only GCC is supported. GCC is ported to most systems available and there +is no need to pollute the source code with @code{#ifdef}s +related to the compiler. + +@section Can I use FFmpeg or libavcodec under Windows ? + +Yes, but the MinGW tools @emph{must} be used to compile FFmpeg. You +can link the resulting DLLs with any other Windows program. Read the +@emph{Native Windows Compilation} and @emph{Visual C++ compatibility} +sections in the FFmpeg documentation to find more information. + +@section Can you add automake, libtool or autoconf support ? + +No. These tools are too bloated and they complicate the build. Moreover, +since only @samp{gcc} is supported they would add little advantages in +terms of portability. + +@section Why not rewrite ffmpeg in object-oriented C++ ? + +ffmpeg is already organized in a highly modular manner and does not need to +be rewritten in a formal object language. Further, many of the developers +favor straight C; it works for them. For more arguments on this matter, +read "Programming Religion" at (@url{http://lkml.org/faq/lkmlfaq-15.html}). + +@section Why are the ffmpeg programs devoid of debugging symbols ? + +The build process creates ffmpeg_g, ffplay_g, etc. which contain full debug +information. Those binaries are strip'd to create ffmpeg, ffplay, etc. If +you need the debug information, used the *_g versions. + +@section I do not like the LGPL, can I contribute code under the GPL instead ? + +Yes, as long as the code is optional and can easily and cleanly be placed +under #ifdef CONFIG_GPL without breaking anything. So for example a new codec +or filter would be OK under GPL while a bugfix to LGPL code would not. + +@section I want to compile xyz.c alone but my compiler produced many errors. + +Common code is in its own files in libav* and is used by the individual +codecs. They will not work without the common parts, you have to compile +the whole libav*. If you wish, disable some parts with configure switches. +You can also try to hack it and remove more, but if you had problems fixing +the compilation failure then you are probably not qualified for this. + +@section Visual C++ produces many errors. + +Visual C++ is not compliant to the C standard and does not support +the inline assembly used in FFmpeg. +If you wish - for whatever weird reason - to use Visual C++ for your +project then you can link the Visual C++ code with libav* as long as +you compile the latter with a working C compiler. For more information, see +the @emph{Visual C++ compatibility} section in the FFmpeg documentation. + +There have been efforts to make FFmpeg compatible with Visual C++ in the +past. However, they have all been rejected as too intrusive, especially +since MinGW does the job perfectly adequately. None of the core developers +work with Visual C++ and thus this item is low priority. Should you find +the silver bullet that solves this problem, feel free to shoot it at us. + +@bye diff --git a/mpeg4/src/doc/ffmpeg-doc.texi b/mpeg4/src/doc/ffmpeg-doc.texi new file mode 100644 index 0000000000000000000000000000000000000000..cfa1488006320f58f70b15b51fb9172b7d6a07fe --- /dev/null +++ b/mpeg4/src/doc/ffmpeg-doc.texi @@ -0,0 +1,1370 @@ +\input texinfo @c -*- texinfo -*- + +@settitle FFmpeg Documentation +@titlepage +@sp 7 +@center @titlefont{FFmpeg Documentation} +@sp 3 +@end titlepage + + +@chapter Introduction + +FFmpeg is a very fast video and audio converter. It can also grab from +a live audio/video source. + +The command line interface is designed to be intuitive, in the sense +that FFmpeg tries to figure out all parameters that can possibly be +derived automatically. You usually only have to specify the target +bitrate you want. + +FFmpeg can also convert from any sample rate to any other, and resize +video on the fly with a high quality polyphase filter. + +@chapter Quick Start + +@c man begin EXAMPLES +@section Video and Audio grabbing + +FFmpeg can use a video4linux compatible video source and any Open Sound +System audio source: + +@example +ffmpeg /tmp/out.mpg +@end example + +Note that you must activate the right video source and channel before +launching FFmpeg with any TV viewer such as xawtv +(@url{http://bytesex.org/xawtv/}) by Gerd Knorr. You also +have to set the audio recording levels correctly with a +standard mixer. + +@section Video and Audio file format conversion + +* FFmpeg can use any supported file format and protocol as input: + +Examples: + +* You can use YUV files as input: + +@example +ffmpeg -i /tmp/test%d.Y /tmp/out.mpg +@end example + +It will use the files: +@example +/tmp/test0.Y, /tmp/test0.U, /tmp/test0.V, +/tmp/test1.Y, /tmp/test1.U, /tmp/test1.V, etc... +@end example + +The Y files use twice the resolution of the U and V files. They are +raw files, without header. They can be generated by all decent video +decoders. You must specify the size of the image with the @option{-s} option +if FFmpeg cannot guess it. + +* You can input from a raw YUV420P file: + +@example +ffmpeg -i /tmp/test.yuv /tmp/out.avi +@end example + +test.yuv is a file containing raw YUV planar data. Each frame is composed +of the Y plane followed by the U and V planes at half vertical and +horizontal resolution. + +* You can output to a raw YUV420P file: + +@example +ffmpeg -i mydivx.avi hugefile.yuv +@end example + +* You can set several input files and output files: + +@example +ffmpeg -i /tmp/a.wav -s 640x480 -i /tmp/a.yuv /tmp/a.mpg +@end example + +Converts the audio file a.wav and the raw YUV video file a.yuv +to MPEG file a.mpg. + +* You can also do audio and video conversions at the same time: + +@example +ffmpeg -i /tmp/a.wav -ar 22050 /tmp/a.mp2 +@end example + +Converts a.wav to MPEG audio at 22050Hz sample rate. + +* You can encode to several formats at the same time and define a +mapping from input stream to output streams: + +@example +ffmpeg -i /tmp/a.wav -ab 64 /tmp/a.mp2 -ab 128 /tmp/b.mp2 -map 0:0 -map 0:0 +@end example + +Converts a.wav to a.mp2 at 64 kbits and to b.mp2 at 128 kbits. '-map +file:index' specifies which input stream is used for each output +stream, in the order of the definition of output streams. + +* You can transcode decrypted VOBs + +@example +ffmpeg -i snatch_1.vob -f avi -vcodec mpeg4 -b 800 -g 300 -bf 2 -acodec mp3 -ab 128 snatch.avi +@end example + +This is a typical DVD ripping example; the input is a VOB file, the +output an AVI file with MPEG-4 video and MP3 audio. Note that in this +command we use B-frames so the MPEG-4 stream is DivX5 compatible, and +GOP size is 300 which means one intra frame every 10 seconds for 29.97fps +input video. Furthermore, the audio stream is MP3-encoded so you need +to enable LAME support by passing @code{--enable-mp3lame} to configure. +The mapping is particularly useful for DVD transcoding +to get the desired audio language. + +NOTE: To see the supported input formats, use @code{ffmpeg -formats}. +@c man end + +@chapter Invocation + +@section Syntax + +The generic syntax is: + +@example +@c man begin SYNOPSIS +ffmpeg [[infile options][@option{-i} @var{infile}]]... @{[outfile options] @var{outfile}@}... +@c man end +@end example +@c man begin DESCRIPTION +If no input file is given, audio/video grabbing is done. + +As a general rule, options are applied to the next specified +file. For example, if you give the @option{-b 64} option, it sets the video +bitrate of the next file. The format option may be needed for raw input +files. + +By default, FFmpeg tries to convert as losslessly as possible: It +uses the same audio and video parameters for the outputs as the one +specified for the inputs. +@c man end + +@c man begin OPTIONS +@section Main options + +@table @option +@item -L +Show license. + +@item -h +Show help. + +@item -formats +Show available formats, codecs, protocols, ... + +@item -f fmt +Force format. + +@item -i filename +input filename + +@item -y +Overwrite output files. + +@item -t duration +Set the recording time in seconds. +@code{hh:mm:ss[.xxx]} syntax is also supported. + +@item -ss position +Seek to given time position in seconds. +@code{hh:mm:ss[.xxx]} syntax is also supported. + +@item -title string +Set the title. + +@item -author string +Set the author. + +@item -copyright string +Set the copyright. + +@item -comment string +Set the comment. + +@item -target type +Specify target file type ("vcd", "svcd", "dvd", "dv", "pal-vcd", +"ntsc-svcd", ... ). All the format options (bitrate, codecs, +buffer sizes) are then set automatically. You can just type: + +@example +ffmpeg -i myfile.avi -target vcd /tmp/vcd.mpg +@end example + +Nevertheless you can specify additional options as long as you know +they do not conflict with the standard, as in: + +@example +ffmpeg -i myfile.avi -target vcd -bf 2 /tmp/vcd.mpg +@end example + +@item -hq +Activate high quality settings. + +@item -itsoffset offset +Set the input time offset in seconds. +@code{[-]hh:mm:ss[.xxx]} syntax is also supported. +This option affects all the input files that follow it. +The offset is added to the timestamps of the input files. +Specifying a positive offset means that the corresponding +streams are delayed by 'offset' seconds. + +@end table + +@section Video Options + +@table @option +@item -b bitrate +Set the video bitrate in kbit/s (default = 200 kb/s). +@item -r fps +Set frame rate (default = 25). +@item -s size +Set frame size. The format is @samp{wxh} (default = 160x128). +The following abbreviations are recognized: +@table @samp +@item sqcif +128x96 +@item qcif +176x144 +@item cif +352x288 +@item 4cif +704x576 +@end table + +@item -aspect aspect +Set aspect ratio (4:3, 16:9 or 1.3333, 1.7777). +@item -croptop size +Set top crop band size (in pixels). +@item -cropbottom size +Set bottom crop band size (in pixels). +@item -cropleft size +Set left crop band size (in pixels). +@item -cropright size +Set right crop band size (in pixels). +@item -padtop size +Set top pad band size (in pixels). +@item -padbottom size +Set bottom pad band size (in pixels). +@item -padleft size +Set left pad band size (in pixels). +@item -padright size +Set right pad band size (in pixels). +@item -padcolor (hex color) +Set color of padded bands. The value for padcolor is expressed +as a six digit hexadecimal number where the first two digits +represent red, the middle two digits green and last two digits +blue (default = 000000 (black)). +@item -vn +Disable video recording. +@item -bt tolerance +Set video bitrate tolerance (in kbit/s). +@item -maxrate bitrate +Set max video bitrate tolerance (in kbit/s). +@item -minrate bitrate +Set min video bitrate tolerance (in kbit/s). +@item -bufsize size +Set rate control buffer size (in kbit). +@item -vcodec codec +Force video codec to @var{codec}. Use the @code{copy} special value to +tell that the raw codec data must be copied as is. +@item -sameq +Use same video quality as source (implies VBR). + +@item -pass n +Select the pass number (1 or 2). It is useful to do two pass +encoding. The statistics of the video are recorded in the first +pass and the video is generated at the exact requested bitrate +in the second pass. + +@item -passlogfile file +Set two pass logfile name to @var{file}. + +@end table + +@section Advanced Video Options + +@table @option +@item -g gop_size +Set the group of pictures size. +@item -intra +Use only intra frames. +@item -qscale q +Use fixed video quantiser scale (VBR). +@item -qmin q +minimum video quantiser scale (VBR) +@item -qmax q +maximum video quantiser scale (VBR) +@item -qdiff q +maximum difference between the quantiser scales (VBR) +@item -qblur blur +video quantiser scale blur (VBR) +@item -qcomp compression +video quantiser scale compression (VBR) + +@item -rc_init_cplx complexity +initial complexity for single pass encoding +@item -b_qfactor factor +qp factor between P- and B-frames +@item -i_qfactor factor +qp factor between P- and I-frames +@item -b_qoffset offset +qp offset between P- and B-frames +@item -i_qoffset offset +qp offset between P- and I-frames +@item -rc_eq equation +Set rate control equation (@pxref{FFmpeg formula +evaluator}) (default = @code{tex^qComp}). +@item -rc_override override +rate control override for specific intervals +@item -me method +Set motion estimation method to @var{method}. +Available methods are (from lowest to best quality): +@table @samp +@item zero +Try just the (0, 0) vector. +@item phods +@item log +@item x1 +@item epzs +(default method) +@item full +exhaustive search (slow and marginally better than epzs) +@end table + +@item -dct_algo algo +Set DCT algorithm to @var{algo}. Available values are: +@table @samp +@item 0 +FF_DCT_AUTO (default) +@item 1 +FF_DCT_FASTINT +@item 2 +FF_DCT_INT +@item 3 +FF_DCT_MMX +@item 4 +FF_DCT_MLIB +@item 5 +FF_DCT_ALTIVEC +@end table + +@item -idct_algo algo +Set IDCT algorithm to @var{algo}. Available values are: +@table @samp +@item 0 +FF_IDCT_AUTO (default) +@item 1 +FF_IDCT_INT +@item 2 +FF_IDCT_SIMPLE +@item 3 +FF_IDCT_SIMPLEMMX +@item 4 +FF_IDCT_LIBMPEG2MMX +@item 5 +FF_IDCT_PS2 +@item 6 +FF_IDCT_MLIB +@item 7 +FF_IDCT_ARM +@item 8 +FF_IDCT_ALTIVEC +@item 9 +FF_IDCT_SH4 +@item 10 +FF_IDCT_SIMPLEARM +@end table + +@item -er n +Set error resilience to @var{n}. +@table @samp +@item 1 +FF_ER_CAREFUL (default) +@item 2 +FF_ER_COMPLIANT +@item 3 +FF_ER_AGGRESSIVE +@item 4 +FF_ER_VERY_AGGRESSIVE +@end table + +@item -ec bit_mask +Set error concealment to @var{bit_mask}. @var{bit_mask} is a bit mask of +the following values: +@table @samp +@item 1 +FF_EC_GUESS_MVS (default = enabled) +@item 2 +FF_EC_DEBLOCK (default = enabled) +@end table + +@item -bf frames +Use 'frames' B-frames (supported for MPEG-1, MPEG-2 and MPEG-4). +@item -mbd mode +macroblock decision +@table @samp +@item 0 +FF_MB_DECISION_SIMPLE: Use mb_cmp (cannot change it yet in FFmpeg). +@item 1 +FF_MB_DECISION_BITS: Choose the one which needs the fewest bits. +@item 2 +FF_MB_DECISION_RD: rate distortion +@end table + +@item -4mv +Use four motion vector by macroblock (MPEG-4 only). +@item -part +Use data partitioning (MPEG-4 only). +@item -bug param +Work around encoder bugs that are not auto-detected. +@item -strict strictness +How strictly to follow the standards. +@item -aic +Enable Advanced intra coding (h263+). +@item -umv +Enable Unlimited Motion Vector (h263+) + +@item -deinterlace +Deinterlace pictures. +@item -interlace +Force interlacing support in encoder (MPEG-2 and MPEG-4 only). +Use this option if your input file is interlaced and you want +to keep the interlaced format for minimum losses. +The alternative is to deinterlace the input stream with +@option{-deinterlace}, but deinterlacing introduces losses. +@item -psnr +Calculate PSNR of compressed frames. +@item -vstats +Dump video coding statistics to @file{vstats_HHMMSS.log}. +@item -vhook module +Insert video processing @var{module}. @var{module} contains the module +name and its parameters separated by spaces. +@end table + +@section Audio Options + +@table @option +@item -ar freq +Set the audio sampling frequency (default = 44100 Hz). +@item -ab bitrate +Set the audio bitrate in kbit/s (default = 64). +@item -ac channels +Set the number of audio channels (default = 1). +@item -an +Disable audio recording. +@item -acodec codec +Force audio codec to @var{codec}. Use the @code{copy} special value to +specify that the raw codec data must be copied as is. +@end table + +@section Audio/Video grab options + +@table @option +@item -vd device +sEt video grab device (e.g. @file{/dev/video0}). +@item -vc channel +Set video grab channel (DV1394 only). +@item -tvstd standard +Set television standard (NTSC, PAL (SECAM)). +@item -dv1394 +Set DV1394 grab. +@item -ad device +Set audio device (e.g. @file{/dev/dsp}). +@end table + +@section Advanced options + +@table @option +@item -map file:stream +Set input stream mapping. +@item -debug +Print specific debug info. +@item -benchmark +Add timings for benchmarking. +@item -hex +Dump each input packet. +@item -bitexact +Only use bit exact algorithms (for codec testing). +@item -ps size +Set packet size in bits. +@item -re +Read input at native frame rate. Mainly used to simulate a grab device. +@item -loop +Loop over the input stream. Currently it works only for image +streams. This option is used for automatic FFserver testing. +@item -loop_output number_of_times +Repeatedly loop output for formats that support looping such as animated GIF +(0 will loop the output infinitely). +@end table + +@node FFmpeg formula evaluator +@section FFmpeg formula evaluator + +When evaluating a rate control string, FFmpeg uses an internal formula +evaluator. + +The following binary operators are available: @code{+}, @code{-}, +@code{*}, @code{/}, @code{^}. + +The following unary operators are available: @code{+}, @code{-}, +@code{(...)}. + +The following functions are available: +@table @var +@item sinh(x) +@item cosh(x) +@item tanh(x) +@item sin(x) +@item cos(x) +@item tan(x) +@item exp(x) +@item log(x) +@item squish(x) +@item gauss(x) +@item abs(x) +@item max(x, y) +@item min(x, y) +@item gt(x, y) +@item lt(x, y) +@item eq(x, y) +@item bits2qp(bits) +@item qp2bits(qp) +@end table + +The following constants are available: +@table @var +@item PI +@item E +@item iTex +@item pTex +@item tex +@item mv +@item fCode +@item iCount +@item mcVar +@item var +@item isI +@item isP +@item isB +@item avgQP +@item qComp +@item avgIITex +@item avgPITex +@item avgPPTex +@item avgBPTex +@item avgTex +@end table + +@c man end + +@ignore + +@setfilename ffmpeg +@settitle FFmpeg video converter + +@c man begin SEEALSO +ffserver(1), ffplay(1) and the HTML documentation of @file{ffmpeg}. +@c man end + +@c man begin AUTHOR +Fabrice Bellard +@c man end + +@end ignore + +@section Protocols + +The filename can be @file{-} to read from standard input or to write +to standard output. + +FFmpeg also handles many protocols specified with an URL syntax. + +Use 'ffmpeg -formats' to see a list of the supported protocols. + +The protocol @code{http:} is currently used only to communicate with +FFserver (see the FFserver documentation). When FFmpeg will be a +video player it will also be used for streaming :-) + +@chapter Tips + +@itemize +@item For streaming at very low bitrate application, use a low frame rate +and a small GOP size. This is especially true for RealVideo where +the Linux player does not seem to be very fast, so it can miss +frames. An example is: + +@example +ffmpeg -g 3 -r 3 -t 10 -b 50 -s qcif -f rv10 /tmp/b.rm +@end example + +@item The parameter 'q' which is displayed while encoding is the current +quantizer. The value 1 indicates that a very good quality could +be achieved. The value 31 indicates the worst quality. If q=31 appears +too often, it means that the encoder cannot compress enough to meet +your bitrate. You must either increase the bitrate, decrease the +frame rate or decrease the frame size. + +@item If your computer is not fast enough, you can speed up the +compression at the expense of the compression ratio. You can use +'-me zero' to speed up motion estimation, and '-intra' to disable +motion estimation completely (you have only I-frames, which means it +is about as good as JPEG compression). + +@item To have very low audio bitrates, reduce the sampling frequency +(down to 22050 kHz for MPEG audio, 22050 or 11025 for AC3). + +@item To have a constant quality (but a variable bitrate), use the option +'-qscale n' when 'n' is between 1 (excellent quality) and 31 (worst +quality). + +@item When converting video files, you can use the '-sameq' option which +uses the same quality factor in the encoder as in the decoder. +It allows almost lossless encoding. + +@end itemize + +@chapter Supported File Formats and Codecs + +You can use the @code{-formats} option to have an exhaustive list. + +@section File Formats + +FFmpeg supports the following file formats through the @code{libavformat} +library: + +@multitable @columnfractions .4 .1 .1 .4 +@item Supported File Format @tab Encoding @tab Decoding @tab Comments +@item MPEG audio @tab X @tab X +@item MPEG-1 systems @tab X @tab X +@tab muxed audio and video +@item MPEG-2 PS @tab X @tab X +@tab also known as @code{VOB} file +@item MPEG-2 TS @tab @tab X +@tab also known as DVB Transport Stream +@item ASF@tab X @tab X +@item AVI@tab X @tab X +@item WAV@tab X @tab X +@item Macromedia Flash@tab X @tab X +@tab Only embedded audio is decoded. +@item FLV @tab X @tab X +@tab Macromedia Flash video files +@item Real Audio and Video @tab X @tab X +@item Raw AC3 @tab X @tab X +@item Raw MJPEG @tab X @tab X +@item Raw MPEG video @tab X @tab X +@item Raw PCM8/16 bits, mulaw/Alaw@tab X @tab X +@item Raw CRI ADX audio @tab X @tab X +@item Raw Shorten audio @tab @tab X +@item SUN AU format @tab X @tab X +@item NUT @tab X @tab X @tab NUT Open Container Format +@item QuickTime @tab X @tab X +@item MPEG-4 @tab X @tab X +@tab MPEG-4 is a variant of QuickTime. +@item Raw MPEG4 video @tab X @tab X +@item DV @tab X @tab X +@item 4xm @tab @tab X +@tab 4X Technologies format, used in some games. +@item Playstation STR @tab @tab X +@item Id RoQ @tab @tab X +@tab Used in Quake III, Jedi Knight 2, other computer games. +@item Interplay MVE @tab @tab X +@tab Format used in various Interplay computer games. +@item WC3 Movie @tab @tab X +@tab Multimedia format used in Origin's Wing Commander III computer game. +@item Sega FILM/CPK @tab @tab X +@tab Used in many Sega Saturn console games. +@item Westwood Studios VQA/AUD @tab @tab X +@tab Multimedia formats used in Westwood Studios games. +@item Id Cinematic (.cin) @tab @tab X +@tab Used in Quake II. +@item FLIC format @tab @tab X +@tab .fli/.flc files +@item Sierra VMD @tab @tab X +@tab Used in Sierra CD-ROM games. +@item Sierra Online @tab @tab X +@tab .sol files used in Sierra Online games. +@item Matroska @tab @tab X +@item Electronic Arts Multimedia @tab @tab X +@tab Used in various EA games; files have extensions like WVE and UV2. +@item Nullsoft Video (NSV) format @tab @tab X +@item ADTS AAC audio @tab X @tab X +@item Creative VOC @tab X @tab X @tab Created for the Sound Blaster Pro. +@item American Laser Games MM @tab @tab X +@tab Multimedia format used in games like Mad Dog McCree +@item AVS @tab @tab X +@tab Multimedia format used by the Creature Shock game. +@item Smacker @tab @tab X +@tab Multimedia format used by many games. +@end multitable + +@code{X} means that encoding (resp. decoding) is supported. + +@section Image Formats + +FFmpeg can read and write images for each frame of a video sequence. The +following image formats are supported: + +@multitable @columnfractions .4 .1 .1 .4 +@item Supported Image Format @tab Encoding @tab Decoding @tab Comments +@item PGM, PPM @tab X @tab X +@item PAM @tab X @tab X @tab PAM is a PNM extension with alpha support. +@item PGMYUV @tab X @tab X @tab PGM with U and V components in YUV 4:2:0 +@item JPEG @tab X @tab X @tab Progressive JPEG is not supported. +@item .Y.U.V @tab X @tab X @tab one raw file per component +@item animated GIF @tab X @tab X @tab Only uncompressed GIFs are generated. +@item PNG @tab X @tab X @tab 2 bit and 4 bit/pixel not supported yet. +@item SGI @tab X @tab X @tab SGI RGB image format +@end multitable + +@code{X} means that encoding (resp. decoding) is supported. + +@section Video Codecs + +@multitable @columnfractions .4 .1 .1 .4 +@item Supported Codec @tab Encoding @tab Decoding @tab Comments +@item MPEG-1 video @tab X @tab X +@item MPEG-2 video @tab X @tab X +@item MPEG-4 @tab X @tab X @tab also known as DivX4/5 +@item MSMPEG4 V1 @tab X @tab X +@item MSMPEG4 V2 @tab X @tab X +@item MSMPEG4 V3 @tab X @tab X @tab also known as DivX3 +@item WMV7 @tab X @tab X +@item WMV8 @tab X @tab X @tab not completely working +@item H.261 @tab X @tab X +@item H.263(+) @tab X @tab X @tab also known as RealVideo 1.0 +@item H.264 @tab @tab X +@item RealVideo 1.0 @tab X @tab X +@item RealVideo 2.0 @tab X @tab X +@item MJPEG @tab X @tab X +@item lossless MJPEG @tab X @tab X +@item JPEG-LS @tab X @tab X @tab fourcc: MJLS, lossless and near-lossless is supported +@item Apple MJPEG-B @tab @tab X +@item Sunplus MJPEG @tab @tab X @tab fourcc: SP5X +@item DV @tab X @tab X +@item HuffYUV @tab X @tab X +@item FFmpeg Video 1 @tab X @tab X @tab experimental lossless codec (fourcc: FFV1) +@item FFmpeg Snow @tab X @tab X @tab experimental wavelet codec (fourcc: SNOW) +@item Asus v1 @tab X @tab X @tab fourcc: ASV1 +@item Asus v2 @tab X @tab X @tab fourcc: ASV2 +@item Creative YUV @tab @tab X @tab fourcc: CYUV +@item Sorenson Video 1 @tab X @tab X @tab fourcc: SVQ1 +@item Sorenson Video 3 @tab @tab X @tab fourcc: SVQ3 +@item On2 VP3 @tab @tab X @tab still experimental +@item Theora @tab @tab X @tab still experimental +@item Intel Indeo 3 @tab @tab X +@item FLV @tab X @tab X @tab Sorenson H.263 used in Flash +@item ATI VCR1 @tab @tab X @tab fourcc: VCR1 +@item ATI VCR2 @tab @tab X @tab fourcc: VCR2 +@item Cirrus Logic AccuPak @tab @tab X @tab fourcc: CLJR +@item 4X Video @tab @tab X @tab Used in certain computer games. +@item Sony Playstation MDEC @tab @tab X +@item Id RoQ @tab @tab X @tab Used in Quake III, Jedi Knight 2, other computer games. +@item Xan/WC3 @tab @tab X @tab Used in Wing Commander III .MVE files. +@item Interplay Video @tab @tab X @tab Used in Interplay .MVE files. +@item Apple Animation @tab @tab X @tab fourcc: 'rle ' +@item Apple Graphics @tab @tab X @tab fourcc: 'smc ' +@item Apple Video @tab @tab X @tab fourcc: rpza +@item Apple QuickDraw @tab @tab X @tab fourcc: qdrw +@item Cinepak @tab @tab X +@item Microsoft RLE @tab @tab X +@item Microsoft Video-1 @tab @tab X +@item Westwood VQA @tab @tab X +@item Id Cinematic Video @tab @tab X @tab Used in Quake II. +@item Planar RGB @tab @tab X @tab fourcc: 8BPS +@item FLIC video @tab @tab X +@item Duck TrueMotion v1 @tab @tab X @tab fourcc: DUCK +@item Duck TrueMotion v2 @tab @tab X @tab fourcc: TM20 +@item VMD Video @tab @tab X @tab Used in Sierra VMD files. +@item MSZH @tab @tab X @tab Part of LCL +@item ZLIB @tab X @tab X @tab Part of LCL, encoder experimental +@item TechSmith Camtasia @tab @tab X @tab fourcc: TSCC +@item IBM Ultimotion @tab @tab X @tab fourcc: ULTI +@item Miro VideoXL @tab @tab X @tab fourcc: VIXL +@item QPEG @tab @tab X @tab fourccs: QPEG, Q1.0, Q1.1 +@item LOCO @tab @tab X @tab +@item Winnov WNV1 @tab @tab X @tab +@item Autodesk Animator Studio Codec @tab @tab X @tab fourcc: AASC +@item Fraps FPS1 @tab @tab X @tab +@item CamStudio @tab @tab X @tab fourcc: CSCD +@item American Laser Games Video @tab @tab X @tab Used in games like Mad Dog McCree +@item ZMBV @tab @tab X @tab +@item AVS Video @tab @tab X @tab Video encoding used by the Creature Shock game. +@item Smacker Video @tab @tab X @tab Video encoding used in Smacker. +@item RTjpeg @tab @tab X @tab Video encoding used in NuppelVideo files. +@item KMVC @tab @tab X @tab Codec used in Worms games. +@end multitable + +@code{X} means that encoding (resp. decoding) is supported. + +See @url{http://mplayerhq.hu/~michael/codec-features.html} to +get a precise comparison of the FFmpeg MPEG-4 codec compared to +other implementations. + +@section Audio Codecs + +@multitable @columnfractions .4 .1 .1 .1 .7 +@item Supported Codec @tab Encoding @tab Decoding @tab Comments +@item MPEG audio layer 2 @tab IX @tab IX +@item MPEG audio layer 1/3 @tab IX @tab IX +@tab MP3 encoding is supported through the external library LAME. +@item AC3 @tab IX @tab IX +@tab liba52 is used internally for decoding. +@item Vorbis @tab X @tab X +@tab Supported through the external library libvorbis. +@item WMA V1/V2 @tab @tab X +@item AAC @tab X @tab X +@tab Supported through the external library libfaac/libfaad. +@item Microsoft ADPCM @tab X @tab X +@item MS IMA ADPCM @tab X @tab X +@item QT IMA ADPCM @tab @tab X +@item 4X IMA ADPCM @tab @tab X +@item G.726 ADPCM @tab X @tab X +@item Duck DK3 IMA ADPCM @tab @tab X +@tab Used in some Sega Saturn console games. +@item Duck DK4 IMA ADPCM @tab @tab X +@tab Used in some Sega Saturn console games. +@item Westwood Studios IMA ADPCM @tab @tab X +@tab Used in Westwood Studios games like Command and Conquer. +@item SMJPEG IMA ADPCM @tab @tab X +@tab Used in certain Loki game ports. +@item CD-ROM XA ADPCM @tab @tab X +@item CRI ADX ADPCM @tab X @tab X +@tab Used in Sega Dreamcast games. +@item Electronic Arts ADPCM @tab @tab X +@tab Used in various EA titles. +@item Creative ADPCM @tab @tab X +@tab 16 -> 4, 8 -> 4, 8 -> 3, 8 -> 2 +@item RA144 @tab @tab X +@tab Real 14400 bit/s codec +@item RA288 @tab @tab X +@tab Real 28800 bit/s codec +@item RADnet @tab X @tab IX +@tab Real low bitrate AC3 codec, liba52 is used for decoding. +@item AMR-NB @tab X @tab X +@tab Supported through an external library. +@item AMR-WB @tab X @tab X +@tab Supported through an external library. +@item DV audio @tab @tab X +@item Id RoQ DPCM @tab @tab X +@tab Used in Quake III, Jedi Knight 2, other computer games. +@item Interplay MVE DPCM @tab @tab X +@tab Used in various Interplay computer games. +@item Xan DPCM @tab @tab X +@tab Used in Origin's Wing Commander IV AVI files. +@item Sierra Online DPCM @tab @tab X +@tab Used in Sierra Online game audio files. +@item Apple MACE 3 @tab @tab X +@item Apple MACE 6 @tab @tab X +@item FLAC lossless audio @tab @tab X +@item Shorten lossless audio @tab @tab X +@item Apple lossless audio @tab @tab X +@tab QuickTime fourcc 'alac' +@item FFmpeg Sonic @tab X @tab X +@tab experimental lossy/lossless codec +@item Qdesign QDM2 @tab @tab X +@tab there are still some distortions +@item Real COOK @tab @tab X +@tab All versions except 5.1 are supported +@item DSP Group TrueSpeech @tab @tab X +@item True Audio (TTA) @tab @tab X +@item Smacker Audio @tab @tab X +@end multitable + +@code{X} means that encoding (resp. decoding) is supported. + +@code{I} means that an integer-only version is available, too (ensures high +performance on systems without hardware floating point support). + +@chapter Platform Specific information + +@section Linux + +FFmpeg should be compiled with at least GCC 2.95.3. GCC 3.2 is the +preferred compiler now for FFmpeg. All future optimizations will depend on +features only found in GCC 3.2. + +@section BSD + +BSD make will not build FFmpeg, you need to install and use GNU Make +(@file{gmake}). + +@section Windows + +@subsection Native Windows compilation + +@itemize +@item Install the current versions of MSYS and MinGW from +@url{http://www.mingw.org/}. You can find detailed installation +instructions in the download section and the FAQ. + +@item If you want to test the FFplay, also download +the MinGW development library of SDL 1.2.x +(@file{SDL-devel-1.2.x-mingw32.tar.gz}) from +@url{http://www.libsdl.org}. Unpack it in a temporary directory, and +unpack the archive @file{i386-mingw32msvc.tar.gz} in the MinGW tool +directory. Edit the @file{sdl-config} script so that it gives the +correct SDL directory when invoked. + +@item Extract the current version of FFmpeg. + +@item Start the MSYS shell (file @file{msys.bat}). + +@item Change to the FFmpeg directory and follow + the instructions of how to compile FFmpeg (file +@file{INSTALL}). Usually, launching @file{./configure} and @file{make} +suffices. If you have problems using SDL, verify that +@file{sdl-config} can be launched from the MSYS command line. + +@item You can install FFmpeg in @file{Program Files/FFmpeg} by typing +@file{make install}. Don't forget to copy @file{SDL.dll} to the place +you launch @file{ffplay} from. + +@end itemize + +Notes: +@itemize + +@item The target @file{make wininstaller} can be used to create a +Nullsoft based Windows installer for FFmpeg and FFplay. @file{SDL.dll} +must be copied to the FFmpeg directory in order to build the +installer. + +@item By using @code{./configure --enable-shared} when configuring FFmpeg, +you can build @file{avcodec.dll} and @file{avformat.dll}. With +@code{make install} you install the FFmpeg DLLs and the associated +headers in @file{Program Files/FFmpeg}. + +@item Visual C++ compatibility: If you used @code{./configure --enable-shared} +when configuring FFmpeg, FFmpeg tries to use the Microsoft Visual +C++ @code{lib} tool to build @code{avcodec.lib} and +@code{avformat.lib}. With these libraries you can link your Visual C++ +code directly with the FFmpeg DLLs (see below). + +@end itemize + +@subsection Visual C++ compatibility + +FFmpeg will not compile under Visual C++ -- and it has too many +dependencies on the GCC compiler to make a port viable. However, +if you want to use the FFmpeg libraries in your own applications, +you can still compile those applications using Visual C++. An +important restriction to this is that you have to use the +dynamically linked versions of the FFmpeg libraries (i.e. the +DLLs), and you have to make sure that Visual-C++-compatible +import libraries are created during the FFmpeg build process. + +This description of how to use the FFmpeg libraries with Visual C++ is +based on Visual C++ 2005 Express Edition Beta 2. If you have a different +version, you might have to modify the procedures slightly. + +Here are the step-by-step instructions for building the FFmpeg libraries +so they can be used with Visual C++: + +@enumerate + +@item Install Visual C++ (if you haven't done so already). + +@item Install MinGW and MSYS as described above. + +@item Add a call to @file{vcvars32.bat} (which sets up the environment +variables for the Visual C++ tools) as the first line of +@file{msys.bat}. The standard location for @file{vcvars32.bat} is +@file{C:\Program Files\Microsoft Visual Studio 8\VC\bin\vcvars32.bat}, +and the standard location for @file{msys.bat} is +@file{C:\msys\1.0\msys.bat}. If this corresponds to your setup, add the +following line as the first line of @file{msys.bat}: + +@code{call "C:\Program Files\Microsoft Visual Studio 8\VC\bin\vcvars32.bat"} + +@item Start the MSYS shell (file @file{msys.bat}) and type @code{link.exe}. +If you get a help message with the command line options of @code{link.exe}, +this means your environment variables are set up correctly, the +Microsoft linker is on the path and will be used by FFmpeg to +create Visual-C++-compatible import libraries. + +@item Extract the current version of FFmpeg and change to the FFmpeg directory. + +@item Type the command +@code{./configure --enable-shared --disable-static --enable-memalign-hack} +to configure and, if that didn't produce any errors, +type @code{make} to build FFmpeg. + +@item The subdirectories @file{libavformat}, @file{libavcodec}, and +@file{libavutil} should now contain the files @file{avformat.dll}, +@file{avformat.lib}, @file{avcodec.dll}, @file{avcodec.lib}, +@file{avutil.dll}, and @file{avutil.lib}, respectively. Copy the three +DLLs to your System32 directory (typically @file{C:\Windows\System32}). + +@end enumerate + +And here is how to use these libraries with Visual C++: + +@enumerate + +@item Create a new console application ("File / New / Project") and then +select "Win32 Console Application". On the appropriate page of the +Application Wizard, uncheck the "Precompiled headers" option. + +@item Write the source code for your application, or, for testing, just +copy the code from an existing sample application into the source file +that Visual C++ has already created for you. (Note that your source +filehas to have a @code{.cpp} extension; otherwise, Visual C++ won't +compile the FFmpeg headers correctly because in C mode, it doesn't +recognize the @code{inline} keyword.) For example, you can copy +@file{output_example.c} from the FFmpeg distribution (but you will +have to make minor modifications so the code will compile under +C++, see below). + +@item Open the "Project / Properties" dialog box. In the "Configuration" +combo box, select "All Configurations" so that the changes you make will +affect both debug and release builds. In the tree view on the left hand +side, select "C/C++ / General", then edit the "Additional Include +Directories" setting to contain the complete paths to the +@file{libavformat}, @file{libavcodec}, and @file{libavutil} +subdirectories of your FFmpeg directory. Note that the directories have +to be separated using semicolons. Now select "Linker / General" from the +tree view and edit the "Additional Library Directories" setting to +contain the same three directories. + +@item Still in the "Project / Properties" dialog box, select "Linker / Input" +from the tree view, then add the files @file{avformat.lib}, +@file{avcodec.lib}, and @file{avutil.lib} to the end of the "Additional +Dependencies". Note that the names of the libraries have to be separated +using spaces. + +@item Now, select "C/C++ / Code Generation" from the tree view. Select +"Debug" in the "Configuration" combo box. Make sure that "Runtime +Library" is set to "Multi-threaded Debug DLL". Then, select "Release" in +the "Configuration" combo box and make sure that "Runtime Library" is +set to "Multi-threaded DLL". + +@item Click "OK" to close the "Project / Properties" dialog box and build +the application. Hopefully, it should compile and run cleanly. If you +used @file{output_example.c} as your sample application, you will get a +few compiler errors, but they are easy to fix. The first type of error +occurs because Visual C++ doesn't allow an @code{int} to be converted to +an @code{enum} without a cast. To solve the problem, insert the required +casts (this error occurs once for a @code{CodecID} and once for a +@code{CodecType}). The second type of error occurs because C++ requires +the return value of @code{malloc} to be cast to the exact type of the +pointer it is being assigned to. Visual C++ will complain that, for +example, @code{(void *)} is being assigned to @code{(uint8_t *)} without +an explicit cast. So insert an explicit cast in these places to silence +the compiler. The third type of error occurs because the @code{snprintf} +library function is called @code{_snprintf} under Visual C++. So just +add an underscore to fix the problem. With these changes, +@file{output_example.c} should compile under Visual C++, and the +resulting executable should produce valid video files. + +@end enumerate + +@subsection Cross compilation for Windows with Linux + +You must use the MinGW cross compilation tools available at +@url{http://www.mingw.org/}. + +Then configure FFmpeg with the following options: +@example +./configure --enable-mingw32 --cross-prefix=i386-mingw32msvc- +@end example +(you can change the cross-prefix according to the prefix chosen for the +MinGW tools). + +Then you can easily test FFmpeg with Wine +(@url{http://www.winehq.com/}). + +@section Mac OS X + +@section BeOS + +The configure script should guess the configuration itself. +Networking support is currently not finished. +errno issues fixed by Andrew Bachmann. + +Old stuff: + +François Revol - revol at free dot fr - April 2002 + +The configure script should guess the configuration itself, +however I still didn't test building on the net_server version of BeOS. + +FFserver is broken (needs poll() implementation). + +There are still issues with errno codes, which are negative in BeOS, and +that FFmpeg negates when returning. This ends up turning errors into +valid results, then crashes. +(To be fixed) + +@chapter Developers Guide + +@section API +@itemize +@item libavcodec is the library containing the codecs (both encoding and +decoding). Look at @file{libavcodec/apiexample.c} to see how to use it. + +@item libavformat is the library containing the file format handling (mux and +demux code for several formats). Look at @file{ffplay.c} to use it in a +player. See @file{output_example.c} to use it to generate audio or video +streams. + +@end itemize + +@section Integrating libavcodec or libavformat in your program + +You can integrate all the source code of the libraries to link them +statically to avoid any version problem. All you need is to provide a +'config.mak' and a 'config.h' in the parent directory. See the defines +generated by ./configure to understand what is needed. + +You can use libavcodec or libavformat in your commercial program, but +@emph{any patch you make must be published}. The best way to proceed is +to send your patches to the FFmpeg mailing list. + +@node Coding Rules +@section Coding Rules + +FFmpeg is programmed in the ISO C90 language with a few additional +features from ISO C99, namely: +@itemize @bullet +@item +the @samp{inline} keyword; +@item +@samp{//} comments; +@item +designated struct initializers (@samp{struct s x = @{ .i = 17 @};}) +@item +compound literals (@samp{x = (struct s) @{ 17, 23 @};}) +@end itemize + +These features are supported by all compilers we care about, so we won't +accept patches to remove their use unless they absolutely don't impair +clarity and performance. + +All code must compile with GCC 2.95 and GCC 3.3. Currently, FFmpeg also +compiles with several other compilers, such as the Compaq ccc compiler +or Sun Studio 9, and we would like to keep it that way unless it would +be exceedingly involved. To ensure compatibility, please don't use any +additional C99 features or GCC extensions. Especially watch out for: +@itemize @bullet +@item +mixing statements and declarations; +@item +@samp{long long} (use @samp{int64_t} instead); +@item +@samp{__attribute__} not protected by @samp{#ifdef __GNUC__} or similar; +@item +GCC statement expressions (@samp{(x = (@{ int y = 4; y; @})}). +@end itemize + +Indent size is 4. +The presentation is the one specified by 'indent -i4 -kr -nut'. +The TAB character is forbidden outside of Makefiles as is any +form of trailing whitespace. Commits containing either will be +rejected by the CVS repository. + +Main priority in FFmpeg is simplicity and small code size (=less +bugs). + +Comments: Use the JavaDoc/Doxygen +format (see examples below) so that code documentation +can be generated automatically. All nontrivial functions should have a comment +above them explaining what the function does, even if it's just one sentence. +All structures and their member variables should be documented, too. +@example +/** + * @@file mpeg.c + * MPEG codec. + * @@author ... + */ + +/** + * Summary sentence. + * more text ... + * ... + */ +typedef struct Foobar@{ + int var1; /**< var1 description */ + int var2; ///< var2 description + /** var3 description */ + int var3; +@} Foobar; + +/** + * Summary sentence. + * more text ... + * ... + * @@param my_parameter description of my_parameter + * @@return return value description + */ +int myfunc(int my_parameter) +... +@end example + +fprintf and printf are forbidden in libavformat and libavcodec, +please use av_log() instead. + +@node CVS Policy +@section CVS Policy + +@enumerate +@item + You must not commit code which breaks FFmpeg! (Meaning unfinished but + enabled code which breaks compilation or compiles but does not work or + breaks the regression tests) + You can commit unfinished stuff (for testing etc), but it must be disabled + (#ifdef etc) by default so it does not interfere with other developers' + work. +@item + You don't have to over-test things. If it works for you, and you think it + should work for others, then commit. If your code has problems + (portability, triggers compiler bugs, unusual environment etc) they will be + reported and eventually fixed. +@item + Do not commit unrelated changes together, split them into self-contained + pieces. +@item + Do not change behavior of the program (renaming options etc) without + first discussing it on the ffmpeg-devel mailing list. Do not remove + functionality from the code. Just improve! + + Note: Redundant code can be removed. +@item + Do not commit changes to the build system (Makefiles, configure script) + which change behavior, defaults etc, without asking first. The same + applies to compiler warning fixes, trivial looking fixes and to code + maintained by other developers. We usually have a reason for doing things + the way we do. Send your changes as patches to the ffmpeg-devel mailing + list, and if the code maintainers say OK, you may commit. This does not + apply to files you wrote and/or maintain. +@item + We refuse source indentation and other cosmetic changes if they are mixed + with functional changes, such commits will be rejected and removed. Every + developer has his own indentation style, you should not change it. Of course + if you (re)write something, you can use your own style, even though we would + prefer if the indentation throughout FFmpeg was consistent (Many projects + force a given indentation style - we don't.). If you really need to make + indentation changes (try to avoid this), separate them strictly from real + changes. + + NOTE: If you had to put if()@{ .. @} over a large (> 5 lines) chunk of code, + then either do NOT change the indentation of the inner part within (don't + move it to the right)! or do so in a separate commit +@item + Always fill out the commit log message. Describe in a few lines what you + changed and why. You can refer to mailing list postings if you fix a + particular bug. Comments such as "fixed!" or "Changed it." are unacceptable. +@item + If you apply a patch by someone else, include the name and email address in + the CVS log message. Since the ffmpeg-cvslog mailing list is publicly + archived you should add some SPAM protection to the email address. Send an + answer to ffmpeg-devel (or wherever you got the patch from) saying that + you applied the patch. +@item + Do NOT commit to code actively maintained by others without permission. Send + a patch to ffmpeg-devel instead. +@item + Subscribe to the ffmpeg-cvslog mailing list. The diffs of all CVS commits + are sent there and reviewed by all the other developers. Bugs and possible + improvements or general questions regarding commits are discussed there. We + expect you to react if problems with your code are uncovered. +@item + Update the documentation if you change behavior or add features. If you are + unsure how best to do this, send a patch to ffmpeg-devel, the documentation + maintainer(s) will review and commit your stuff. +@item + Revert a commit ONLY in case of a big blunder like committing something not + intended to be committed or committing a wrong file, the wrong version of a + patch, CVS policy violation or broken code and you are going to recommit the + right thing immediately. + + Never revert changes made a long time ago or buggy code. Fix it in the + normal way instead. +@item + Never write to unallocated memory, never write over the end of arrays, + always check values read from some untrusted source before using them + as array index or other risky things. +@item + Remember to check if you need to bump versions for the specific libav + parts (libavutil, libavcodec, libavformat) you are changing. You need + to change the version integer and the version string. + Incrementing the first component means no backward compatibility to + previous versions (e.g. removal of a function). + Incrementing the second component means backward compatible change + (e.g. addition of a function). + Incrementing the third component means a noteworthy binary compatible + change (e.g. encoder bug fix that matters for the decoder). +@item + If you add a new codec, remember to update the changelog, add it to + the supported codecs table in the documentation and bump the second + component of the @file{libavcodec} version number appropriately. If + it has a fourcc, add it to @file{libavformat/avienc.c}, even if it + is only a decoder. +@end enumerate + +We think our rules are not too hard. If you have comments, contact us. + +Note, these rules are mostly borrowed from the MPlayer project. + +@subsection Renaming/moving files or content of files + You CANNOT do that. Post a request for such a change to the mailing list + Do NOT remove & readd a file - it will kill the changelog!!!! + +@section Submitting patches + +First, (@pxref{Coding Rules}) above if you didn't yet. + +When you submit your patch, try to send a unified diff (diff '-up' +option). I cannot read other diffs :-) + +Also please do not submit patches which contain several unrelated changes. +Split them into individual self-contained patches; this makes reviewing +them much easier. + +Run the regression tests before submitting a patch so that you can +verify that there are no big problems. + +Patches should be posted as base64 encoded attachments (or any other +encoding which ensures that the patch won't be trashed during +transmission) to the ffmpeg-devel mailing list, see +@url{http://www1.mplayerhq.hu/mailman/listinfo/ffmpeg-devel} + +It also helps quite a bit if you tell us what the patch does (for example +'replaces lrint by lrintf'), and why (for example '*BSD isn't C99 compliant +and has no lrint()') + +We reply to all submitted patches and either apply or reject with some +explanation why, but sometimes we are quite busy so it can take a week or two. + +@section Regression tests + +Before submitting a patch (or committing to CVS), you should at least +test that you did not break anything. + +The regression tests build a synthetic video stream and a synthetic +audio stream. These are then encoded and decoded with all codecs or +formats. The CRC (or MD5) of each generated file is recorded in a +result file. A 'diff' is launched to compare the reference results and +the result file. + +The regression tests then go on to test the FFserver code with a +limited set of streams. It is important that this step runs correctly +as well. + +Run 'make test' to test all the codecs and formats. + +Run 'make fulltest' to test all the codecs, formats and FFserver. + +[Of course, some patches may change the results of the regression tests. In +this case, the reference results of the regression tests shall be modified +accordingly]. + +@bye diff --git a/mpeg4/src/doc/ffmpeg_powerpc_performance_evaluation_howto.txt b/mpeg4/src/doc/ffmpeg_powerpc_performance_evaluation_howto.txt new file mode 100644 index 0000000000000000000000000000000000000000..a331212f9e38d5818ac4dd8daf42c5d48ecaa84b --- /dev/null +++ b/mpeg4/src/doc/ffmpeg_powerpc_performance_evaluation_howto.txt @@ -0,0 +1,163 @@ +FFmpeg & evaluating performance on the PowerPC Architecture HOWTO + +(c) 2003-2004 Romain Dolbeau + + + +I - Introduction + +The PowerPC architecture and its SIMD extension AltiVec offer some +interesting tools to evaluate performance and improve the code. +This document tries to explain how to use those tools with FFmpeg. + +The architecture itself offers two ways to evaluate the performance of +a given piece of code: + +1) The Time Base Registers (TBL) +2) The Performance Monitor Counter Registers (PMC) + +The first ones are always available, always active, but they're not very +accurate: the registers increment by one every four *bus* cycles. On +my 667 Mhz tiBook (ppc7450), this means once every twenty *processor* +cycles. So we won't use that. + +The PMC are much more useful: not only can they report cycle-accurate +timing, but they can also be used to monitor many other parameters, +such as the number of AltiVec stalls for every kind of instruction, +or instruction cache misses. The downside is that not all processors +support the PMC (all G3, all G4 and the 970 do support them), and +they're inactive by default - you need to activate them with a +dedicated tool. Also, the number of available PMC depends on the +procesor: the various 604 have 2, the various 75x (aka. G3) have 4, +and the various 74xx (aka G4) have 6. + +*WARNING*: The PowerPC 970 is not very well documented, and its PMC +registers are 64 bits wide. To properly notify the code, you *must* +tune for the 970 (using --tune=970), or the code will assume 32 bit +registers. + + +II - Enabling FFmpeg PowerPC performance support + +This needs to be done by hand. First, you need to configure FFmpeg as +usual, but add the "--powerpc-perf-enable" option. For instance: + +##### +./configure --prefix=/usr/local/ffmpeg-cvs --cc=gcc-3.3 --tune=7450 --powerpc-perf-enable +##### + +This will configure FFmpeg to install inside /usr/local/ffmpeg-cvs, +compiling with gcc-3.3 (you should try to use this one or a newer +gcc), and tuning for the PowerPC 7450 (i.e. the newer G4; as a rule of +thumb, those at 550Mhz and more). It will also enable the PMC. + +You may also edit the file "config.h" to enable the following line: + +##### +// #define ALTIVEC_USE_REFERENCE_C_CODE 1 +##### + +If you enable this line, then the code will not make use of AltiVec, +but will use the reference C code instead. This is useful to compare +performance between two versions of the code. + +Also, the number of enabled PMC is defined in "libavcodec/ppc/dsputil_ppc.h": + +##### +#define POWERPC_NUM_PMC_ENABLED 4 +##### + +If you have a G4 CPU, you can enable all 6 PMC. DO NOT enable more +PMC than available on your CPU! + +Then, simply compile FFmpeg as usual (make && make install). + + + +III - Using FFmpeg PowerPC performance support + +This FFmeg can be used exactly as usual. But before exiting, FFmpeg +will dump a per-function report that looks like this: + +##### +PowerPC performance report + Values are from the PMC registers, and represent whatever the + registers are set to record. + Function "gmc1_altivec" (pmc1): + min: 231 + max: 1339867 + avg: 558.25 (255302) + Function "gmc1_altivec" (pmc2): + min: 93 + max: 2164 + avg: 267.31 (255302) + Function "gmc1_altivec" (pmc3): + min: 72 + max: 1987 + avg: 276.20 (255302) +(...) +##### + +In this example, PMC1 was set to record CPU cycles, PMC2 was set to +record AltiVec Permute Stall Cycles, and PMC3 was set to record AltiVec +Issue Stalls. + +The function "gmc1_altivec" was monitored 255302 times, and the +minimum execution time was 231 processor cycles. The max and average +aren't much use, as it's very likely the OS interrupted execution for +reasons of its own :-( + +With the exact same settings and source file, but using the reference C +code we get: + +##### +PowerPC performance report + Values are from the PMC registers, and represent whatever the + registers are set to record. + Function "gmc1_altivec" (pmc1): + min: 592 + max: 2532235 + avg: 962.88 (255302) + Function "gmc1_altivec" (pmc2): + min: 0 + max: 33 + avg: 0.00 (255302) + Function "gmc1_altivec" (pmc3): + min: 0 + max: 350 + avg: 0.03 (255302) +(...) +##### + +592 cycles, so the fastest AltiVec execution is about 2.5x faster than +the fastest C execution in this example. It's not perfect but it's not +bad (well I wrote this function so I can't say otherwise :-). + +Once you have that kind of report, you can try to improve things by +finding what goes wrong and fixing it; in the example above, one +should try to diminish the number of AltiVec stalls, as this *may* +improve performance. + + + +IV) Enabling the PMC in Mac OS X + +This is easy. Use "Monster" and "monster". Those tools come from +Apple's CHUD package, and can be found hidden in the developer web +site & FTP site. "MONster" is the graphical application, use it to +generate a config file specifying what each register should +monitor. Then use the command-line application "monster" to use that +config file, and enjoy the results. + +Note that "MONster" can be used for many other things, but it's +documented by Apple, it's not my subject. + + + +V) Enabling the PMC on Linux + +I don't know how to do it, sorry :-) Any idea very much welcome. + +-- +Romain Dolbeau + diff --git a/mpeg4/src/doc/ffplay-doc.texi b/mpeg4/src/doc/ffplay-doc.texi new file mode 100644 index 0000000000000000000000000000000000000000..f3e9dc3363769ad6f0b712abc394a83a8cc5974d --- /dev/null +++ b/mpeg4/src/doc/ffplay-doc.texi @@ -0,0 +1,110 @@ +\input texinfo @c -*- texinfo -*- + +@settitle FFplay Documentation +@titlepage +@sp 7 +@center @titlefont{FFplay Documentation} +@sp 3 +@end titlepage + + +@chapter Introduction + +@c man begin DESCRIPTION +FFplay is a very simple and portable media player using the FFmpeg +libraries and the SDL library. It is mostly used as a testbed for the +various FFmpeg APIs. +@c man end + +@chapter Invocation + +@section Syntax +@example +@c man begin SYNOPSIS +ffplay [options] @file{input_file} +@c man end +@end example + +@c man begin OPTIONS +@section Main options + +@table @option +@item -h +show help +@item -x width +force displayed width +@item -y height +force displayed height +@item -an +disable audio +@item -vn +disable video +@item -nodisp +disable graphical display +@item -f fmt +force format +@item -img img_fmt +This option is used to force a given image format +when playing image sequences. Example: +@example +ffplay -img pgmyuv tests/vsynth1/%d.pgm +@end example +@end table + +@section Advanced options +@table @option +@item -stats +Show the stream duration, the codec parameters, the current position in +the stream and the audio/video synchronisation drift. +@item -rtp_tcp +Force RTP/TCP protocol usage instead of RTP/UDP. It is only meaningful +if you are streaming with the RTSP protocol. +@item -sync type +Set the master clock to audio (@code{type=audio}), video +(@code{type=video}) or external (@code{type=ext}). Default is audio. The +master clock is used to control audio-video synchronization. Most media +players use audio as master clock, but in some cases (streaming or high +quality broadcast) it is necessary to change that. This option is mainly +used for debugging purposes. +@end table + +@section While playing + +@table @key +@item q, ESC +quit + +@item f +toggle full screen + +@item p, SPC +pause + +@item a +cycle audio channel + +@item v +cycle video channel + +@item w +show audio waves +@end table + +@c man end + +@ignore + +@setfilename ffplay +@settitle FFplay media player + +@c man begin SEEALSO +ffmpeg(1), ffserver(1) and the html documentation of @file{ffmpeg}. +@c man end + +@c man begin AUTHOR +Fabrice Bellard +@c man end + +@end ignore + +@bye diff --git a/mpeg4/src/doc/ffserver-doc.texi b/mpeg4/src/doc/ffserver-doc.texi new file mode 100644 index 0000000000000000000000000000000000000000..ed67bb6c04205fe71307857fe76f4dd118022f93 --- /dev/null +++ b/mpeg4/src/doc/ffserver-doc.texi @@ -0,0 +1,224 @@ +\input texinfo @c -*- texinfo -*- + +@settitle FFserver Documentation +@titlepage +@sp 7 +@center @titlefont{FFserver Documentation} +@sp 3 +@end titlepage + + +@chapter Introduction + +@c man begin DESCRIPTION +FFserver is a streaming server for both audio and video. It supports +several live feeds, streaming from files and time shifting on live feeds +(you can seek to positions in the past on each live feed, provided you +specify a big enough feed storage in ffserver.conf). + +This documentation covers only the streaming aspects of ffserver / +ffmpeg. All questions about parameters for ffmpeg, codec questions, +etc. are not covered here. Read @file{ffmpeg-doc.html} for more +information. +@c man end + +@chapter QuickStart + +[Contributed by Philip Gladstone, philip-ffserver at gladstonefamily dot net] + +@section What can this do? + +When properly configured and running, you can capture video and audio in real +time from a suitable capture card, and stream it out over the Internet to +either Windows Media Player or RealAudio player (with some restrictions). + +It can also stream from files, though that is currently broken. Very often, a +web server can be used to serve up the files just as well. + +It can stream prerecorded video from .ffm files, though it is somewhat tricky +to make it work correctly. + +@section What do I need? + +I use Linux on a 900MHz Duron with a cheapo Bt848 based TV capture card. I'm +using stock Linux 2.4.17 with the stock drivers. [Actually that isn't true, +I needed some special drivers for my motherboard-based sound card.] + +I understand that FreeBSD systems work just fine as well. + +@section How do I make it work? + +First, build the kit. It *really* helps to have installed LAME first. Then when +you run the ffserver ./configure, make sure that you have the --enable-mp3lame +flag turned on. + +LAME is important as it allows for streaming audio to Windows Media Player. +Don't ask why the other audio types do not work. + +As a simple test, just run the following two command lines (assuming that you +have a V4L video capture card): + +@example +./ffserver -f doc/ffserver.conf & +./ffmpeg http://localhost:8090/feed1.ffm +@end example + +At this point you should be able to go to your Windows machine and fire up +Windows Media Player (WMP). Go to Open URL and enter + +@example + http://:8090/test.asf +@end example + +You should (after a short delay) see video and hear audio. + +WARNING: trying to stream test1.mpg doesn't work with WMP as it tries to +transfer the entire file before starting to play. +The same is true of AVI files. + +@section What happens next? + +You should edit the ffserver.conf file to suit your needs (in terms of +frame rates etc). Then install ffserver and ffmpeg, write a script to start +them up, and off you go. + +@section Troubleshooting + +@subsection I don't hear any audio, but video is fine. + +Maybe you didn't install LAME, or got your ./configure statement wrong. Check +the ffmpeg output to see if a line referring to MP3 is present. If not, then +your configuration was incorrect. If it is, then maybe your wiring is not +set up correctly. Maybe the sound card is not getting data from the right +input source. Maybe you have a really awful audio interface (like I do) +that only captures in stereo and also requires that one channel be flipped. +If you are one of these people, then export 'AUDIO_FLIP_LEFT=1' before +starting ffmpeg. + +@subsection The audio and video loose sync after a while. + +Yes, they do. + +@subsection After a long while, the video update rate goes way down in WMP. + +Yes, it does. Who knows why? + +@subsection WMP 6.4 behaves differently to WMP 7. + +Yes, it does. Any thoughts on this would be gratefully received. These +differences extend to embedding WMP into a web page. [There are two +object IDs that you can use: The old one, which does not play well, and +the new one, which does (both tested on the same system). However, +I suspect that the new one is not available unless you have installed WMP 7]. + +@section What else can it do? + +You can replay video from .ffm files that was recorded earlier. +However, there are a number of caveats, including the fact that the +ffserver parameters must match the original parameters used to record the +file. If they do not, then ffserver deletes the file before recording into it. +(Now that I write this, it seems broken). + +You can fiddle with many of the codec choices and encoding parameters, and +there are a bunch more parameters that you cannot control. Post a message +to the mailing list if there are some 'must have' parameters. Look in +ffserver.conf for a list of the currently available controls. + +It will automatically generate the ASX or RAM files that are often used +in browsers. These files are actually redirections to the underlying ASF +or RM file. The reason for this is that the browser often fetches the +entire file before starting up the external viewer. The redirection files +are very small and can be transferred quickly. [The stream itself is +often 'infinite' and thus the browser tries to download it and never +finishes.] + +@section Tips + +* When you connect to a live stream, most players (WMP, RA, etc) want to +buffer a certain number of seconds of material so that they can display the +signal continuously. However, ffserver (by default) starts sending data +in realtime. This means that there is a pause of a few seconds while the +buffering is being done by the player. The good news is that this can be +cured by adding a '?buffer=5' to the end of the URL. This means that the +stream should start 5 seconds in the past -- and so the first 5 seconds +of the stream are sent as fast as the network will allow. It will then +slow down to real time. This noticeably improves the startup experience. + +You can also add a 'Preroll 15' statement into the ffserver.conf that will +add the 15 second prebuffering on all requests that do not otherwise +specify a time. In addition, ffserver will skip frames until a key_frame +is found. This further reduces the startup delay by not transferring data +that will be discarded. + +* You may want to adjust the MaxBandwidth in the ffserver.conf to limit +the amount of bandwidth consumed by live streams. + +@section Why does the ?buffer / Preroll stop working after a time? + +It turns out that (on my machine at least) the number of frames successfully +grabbed is marginally less than the number that ought to be grabbed. This +means that the timestamp in the encoded data stream gets behind realtime. +This means that if you say 'Preroll 10', then when the stream gets 10 +or more seconds behind, there is no Preroll left. + +Fixing this requires a change in the internals of how timestamps are +handled. + +@section Does the @code{?date=} stuff work. + +Yes (subject to the limitation outlined above). Also note that whenever you +start ffserver, it deletes the ffm file (if any parameters have changed), +thus wiping out what you had recorded before. + +The format of the @code{?date=xxxxxx} is fairly flexible. You should use one +of the following formats (the 'T' is literal): + +@example +* YYYY-MM-DDTHH:MM:SS (localtime) +* YYYY-MM-DDTHH:MM:SSZ (UTC) +@end example + +You can omit the YYYY-MM-DD, and then it refers to the current day. However +note that @samp{?date=16:00:00} refers to 16:00 on the current day -- this +may be in the future and so is unlikely to be useful. + +You use this by adding the ?date= to the end of the URL for the stream. +For example: @samp{http://localhost:8080/test.asf?date=2002-07-26T23:05:00}. + +@chapter Invocation +@section Syntax +@example +@c man begin SYNOPSIS +ffserver [options] +@c man end +@end example + +@section Options +@c man begin OPTIONS +@table @option +@item -L +Print the license. +@item -h +Print the help. +@item -f configfile +Use @file{configfile} instead of @file{/etc/ffserver.conf}. +@end table +@c man end + +@ignore + +@setfilename ffsserver +@settitle FFserver video server + +@c man begin SEEALSO +ffmpeg(1), ffplay(1), the @file{ffmpeg/doc/ffserver.conf} example and +the HTML documentation of @file{ffmpeg}. +@c man end + +@c man begin AUTHOR +Fabrice Bellard +@c man end + +@end ignore + +@bye diff --git a/mpeg4/src/doc/ffserver.conf b/mpeg4/src/doc/ffserver.conf new file mode 100644 index 0000000000000000000000000000000000000000..a3b3ff4129dac60ada6c5e4d603e28edf962349b --- /dev/null +++ b/mpeg4/src/doc/ffserver.conf @@ -0,0 +1,349 @@ +# Port on which the server is listening. You must select a different +# port from your standard HTTP web server if it is running on the same +# computer. +Port 8090 + +# Address on which the server is bound. Only useful if you have +# several network interfaces. +BindAddress 0.0.0.0 + +# Number of simultaneous requests that can be handled. Since FFServer +# is very fast, it is more likely that you will want to leave this high +# and use MaxBandwidth, below. +MaxClients 1000 + +# This the maximum amount of kbit/sec that you are prepared to +# consume when streaming to clients. +MaxBandwidth 1000 + +# Access log file (uses standard Apache log file format) +# '-' is the standard output. +CustomLog - + +# Suppress that if you want to launch ffserver as a daemon. +NoDaemon + + +################################################################## +# Definition of the live feeds. Each live feed contains one video +# and/or audio sequence coming from an ffmpeg encoder or another +# ffserver. This sequence may be encoded simultaneously with several +# codecs at several resolutions. + + + +# You must use 'ffmpeg' to send a live feed to ffserver. In this +# example, you can type: +# +# ffmpeg http://localhost:8090/feed1.ffm + +# ffserver can also do time shifting. It means that it can stream any +# previously recorded live stream. The request should contain: +# "http://xxxx?date=[YYYY-MM-DDT][[HH:]MM:]SS[.m...]".You must specify +# a path where the feed is stored on disk. You also specify the +# maximum size of the feed, where zero means unlimited. Default: +# File=/tmp/feed_name.ffm FileMaxSize=5M +File /tmp/feed1.ffm +FileMaxSize 200K + +# You could specify +# ReadOnlyFile /saved/specialvideo.ffm +# This marks the file as readonly and it will not be deleted or updated. + +# Specify launch in order to start ffmpeg automatically. +# First ffmpeg must be defined with an appropriate path if needed, +# after that options can follow, but avoid adding the http:// field +#Launch ffmpeg + +# Only allow connections from localhost to the feed. +ACL allow 127.0.0.1 + + + + +################################################################## +# Now you can define each stream which will be generated from the +# original audio and video stream. Each format has a filename (here +# 'test1.mpg'). FFServer will send this stream when answering a +# request containing this filename. + + + +# coming from live feed 'feed1' +Feed feed1.ffm + +# Format of the stream : you can choose among: +# mpeg : MPEG-1 multiplexed video and audio +# mpegvideo : only MPEG-1 video +# mp2 : MPEG-2 audio (use AudioCodec to select layer 2 and 3 codec) +# ogg : Ogg format (Vorbis audio codec) +# rm : RealNetworks-compatible stream. Multiplexed audio and video. +# ra : RealNetworks-compatible stream. Audio only. +# mpjpeg : Multipart JPEG (works with Netscape without any plugin) +# jpeg : Generate a single JPEG image. +# asf : ASF compatible streaming (Windows Media Player format). +# swf : Macromedia Flash compatible stream +# avi : AVI format (MPEG-4 video, MPEG audio sound) +# master : special ffmpeg stream used to duplicate a server +Format mpeg + +# Bitrate for the audio stream. Codecs usually support only a few +# different bitrates. +AudioBitRate 32 + +# Number of audio channels: 1 = mono, 2 = stereo +AudioChannels 1 + +# Sampling frequency for audio. When using low bitrates, you should +# lower this frequency to 22050 or 11025. The supported frequencies +# depend on the selected audio codec. +AudioSampleRate 44100 + +# Bitrate for the video stream +VideoBitRate 64 + +# Ratecontrol buffer size +VideoBufferSize 40 + +# Number of frames per second +VideoFrameRate 3 + +# Size of the video frame: WxH (default: 160x128) +# The following abbreviations are defined: sqcif, qcif, cif, 4cif +VideoSize 160x128 + +# Transmit only intra frames (useful for low bitrates, but kills frame rate). +#VideoIntraOnly + +# If non-intra only, an intra frame is transmitted every VideoGopSize +# frames. Video synchronization can only begin at an intra frame. +VideoGopSize 12 + +# More MPEG-4 parameters +# VideoHighQuality +# Video4MotionVector + +# Choose your codecs: +#AudioCodec mp2 +#VideoCodec mpeg1video + +# Suppress audio +#NoAudio + +# Suppress video +#NoVideo + +#VideoQMin 3 +#VideoQMax 31 + +# Set this to the number of seconds backwards in time to start. Note that +# most players will buffer 5-10 seconds of video, and also you need to allow +# for a keyframe to appear in the data stream. +#Preroll 15 + +# ACL: + +# You can allow ranges of addresses (or single addresses) +#ACL ALLOW + +# You can deny ranges of addresses (or single addresses) +#ACL DENY + +# You can repeat the ACL allow/deny as often as you like. It is on a per +# stream basis. The first match defines the action. If there are no matches, +# then the default is the inverse of the last ACL statement. +# +# Thus 'ACL allow localhost' only allows access from localhost. +# 'ACL deny 1.0.0.0 1.255.255.255' would deny the whole of network 1 and +# allow everybody else. + + + + +################################################################## +# Example streams + + +# Multipart JPEG + +# +#Feed feed1.ffm +#Format mpjpeg +#VideoFrameRate 2 +#VideoIntraOnly +#NoAudio +#Strict -1 +# + + +# Single JPEG + +# +#Feed feed1.ffm +#Format jpeg +#VideoFrameRate 2 +#VideoIntraOnly +##VideoSize 352x240 +#NoAudio +#Strict -1 +# + + +# Flash + +# +#Feed feed1.ffm +#Format swf +#VideoFrameRate 2 +#VideoIntraOnly +#NoAudio +# + + +# ASF compatible + + +Feed feed1.ffm +Format asf +VideoFrameRate 15 +VideoSize 352x240 +VideoBitRate 256 +VideoBufferSize 40 +VideoGopSize 30 +AudioBitRate 64 +StartSendOnKey + + + +# MP3 audio + +# +#Feed feed1.ffm +#Format mp2 +#AudioCodec mp3 +#AudioBitRate 64 +#AudioChannels 1 +#AudioSampleRate 44100 +#NoVideo +# + + +# Ogg Vorbis audio + +# +#Feed feed1.ffm +#Title "Stream title" +#AudioBitRate 64 +#AudioChannels 2 +#AudioSampleRate 44100 +#NoVideo +# + + +# Real with audio only at 32 kbits + +# +#Feed feed1.ffm +#Format rm +#AudioBitRate 32 +#NoVideo +#NoAudio +# + + +# Real with audio and video at 64 kbits + +# +#Feed feed1.ffm +#Format rm +#AudioBitRate 32 +#VideoBitRate 128 +#VideoFrameRate 25 +#VideoGopSize 25 +#NoAudio +# + + +################################################################## +# A stream coming from a file: you only need to set the input +# filename and optionally a new format. Supported conversions: +# AVI -> ASF + +# +#File "/usr/local/httpd/htdocs/tlive.rm" +#NoAudio +# + +# +#File "/usr/local/httpd/htdocs/test.asf" +#NoAudio +#Author "Me" +#Copyright "Super MegaCorp" +#Title "Test stream from disk" +#Comment "Test comment" +# + + +################################################################## +# RTSP examples +# +# You can access this stream with the RTSP URL: +# rtsp://localhost:5454/test1-rtsp.mpg +# +# A non-standard RTSP redirector is also created. Its URL is: +# http://localhost:8090/test1-rtsp.rtsp + +# +#Format rtp +#File "/usr/local/httpd/htdocs/test1.mpg" +# + + +################################################################## +# SDP/multicast examples +# +# If you want to send your stream in multicast, you must set the +# multicast address with MulticastAddress. The port and the TTL can +# also be set. +# +# An SDP file is automatically generated by ffserver by adding the +# 'sdp' extension to the stream name (here +# http://localhost:8090/test1-sdp.sdp). You should usually give this +# file to your player to play the stream. +# +# The 'NoLoop' option can be used to avoid looping when the stream is +# terminated. + +# +#Format rtp +#File "/usr/local/httpd/htdocs/test1.mpg" +#MulticastAddress 224.124.0.1 +#MulticastPort 5000 +#MulticastTTL 16 +#NoLoop +# + + +################################################################## +# Special streams + +# Server status + + +Format status + +# Only allow local people to get the status +ACL allow localhost +ACL allow 192.168.0.0 192.168.255.255 + +#FaviconURL http://pond1.gladstonefamily.net:8080/favicon.ico + + + +# Redirect index.html to the appropriate site + + +URL http://www.ffmpeg.org/ + + + diff --git a/mpeg4/src/doc/hooks.texi b/mpeg4/src/doc/hooks.texi new file mode 100644 index 0000000000000000000000000000000000000000..a9c1255ec3117f47810c5553c4889319dde5b689 --- /dev/null +++ b/mpeg4/src/doc/hooks.texi @@ -0,0 +1,49 @@ +\input texinfo @c -*- texinfo -*- + +@settitle Video Hook Documentation +@titlepage +@sp 7 +@center @titlefont{Video Hook Documentation} +@sp 3 +@end titlepage + + +@chapter Introduction + + +The video hook functionality is designed (mostly) for live video. It allows +the video to be modified or examined between the decoder and the encoder. + +Any number of hook modules can be placed inline, and they are run in the +order that they were specified on the ffmpeg command line. + +Three modules are provided and are described below. They are all intended to +be used as a base for your own modules. + +Modules are loaded using the -vhook option to ffmpeg. The value of this parameter +is a space separated list of arguments. The first is the module name, and the rest +are passed as arguments to the Configure function of the module. + +@section null.c + +This does nothing. Actually it converts the input image to RGB24 and then converts +it back again. This is meant as a sample that you can use to test your setup. + +@section fish.c + +This implements a 'fish detector'. Essentially it converts the image into HSV +space and tests whether more than a certain percentage of the pixels fall into +a specific HSV cuboid. If so, then the image is saved into a file for processing +by other bits of code. + +Why use HSV? It turns out that HSV cuboids represent a more compact range of +colors than would an RGB cuboid. + +@section imlib2.c + +This allows a caption to be placed onto each frame. It supports inserting the +time and date. By using the imlib functions, it would be easy to add your own +graphical logo, add a frame/border, etc. + + +@bye diff --git a/mpeg4/src/doc/optimization.txt b/mpeg4/src/doc/optimization.txt new file mode 100644 index 0000000000000000000000000000000000000000..011e9e4f0f8dd40c3e94ce5896dde3862aa29091 --- /dev/null +++ b/mpeg4/src/doc/optimization.txt @@ -0,0 +1,158 @@ +optimization Tips (for libavcodec): + +What to optimize: +If you plan to do non-x86 architecture specific optimizations (SIMD normally), +then take a look in the i386/ directory, as most important functions are +already optimized for MMX. + +If you want to do x86 optimizations then you can either try to finetune the +stuff in the i386 directory or find some other functions in the C source to +optimize, but there aren't many left. + +Understanding these overoptimized functions: +As many functions tend to be a bit difficult to understand because +of optimizations, it can be hard to optimize them further, or write +architecture-specific versions. It is recommened to look at older +CVS versions of the interesting files (just use ViewCVS at +http://www1.mplayerhq.hu/cgi-bin/cvsweb.cgi/ffmpeg/?cvsroot=FFMpeg). +Alternatively, look into the other architecture-specific versions in +the i386/, ppc/, alpha/ subdirectories. Even if you don't exactly +comprehend the instructions, it could help understanding the functions +and how they can be optimized. + +NOTE: If you still don't understand some function, ask at our mailing list!!! +(http://www1.mplayerhq.hu/mailman/listinfo/ffmpeg-devel) + + + +WTF is that function good for ....: +The primary purpose of that list is to avoid wasting time to optimize functions +which are rarely used + +put(_no_rnd)_pixels{,_x2,_y2,_xy2} + Used in motion compensation (en/decoding). + +avg_pixels{,_x2,_y2,_xy2} + Used in motion compensation of B-frames. + These are less important than the put*pixels functions. + +avg_no_rnd_pixels* + unused + +pix_abs16x16{,_x2,_y2,_xy2} + Used in motion estimation (encoding) with SAD. + +pix_abs8x8{,_x2,_y2,_xy2} + Used in motion estimation (encoding) with SAD of MPEG-4 4MV only. + These are less important than the pix_abs16x16* functions. + +put_mspel8_mc* / wmv2_mspel8* + Used only in WMV2. + it is not recommended that you waste your time with these, as WMV2 + is an ugly and relatively useless codec. + +mpeg4_qpel* / *qpel_mc* + Used in MPEG-4 qpel motion compensation (encoding & decoding). + The qpel8 functions are used only for 4mv, + the avg_* functions are used only for B-frames. + Optimizing them should have a significant impact on qpel + encoding & decoding. + +qpel{8,16}_mc??_old_c / *pixels{8,16}_l4 + Just used to work around a bug in an old libavcodec encoder version. + Don't optimize them. + +tpel_mc_func {put,avg}_tpel_pixels_tab + Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding. + +add_bytes/diff_bytes + For huffyuv only, optimize if you want a faster ffhuffyuv codec. + +get_pixels / diff_pixels + Used for encoding, easy. + +clear_blocks + easiest to optimize + +gmc + Used for MPEG-4 gmc. + Optimizing this should have a significant effect on the gmc decoding + speed but it's very likely impossible to write in SIMD. + +gmc1 + Used for chroma blocks in MPEG-4 gmc with 1 warp point + (there are 4 luma & 2 chroma blocks per macroblock, so + only 1/3 of the gmc blocks use this, the other 2/3 + use the normal put_pixel* code, but only if there is + just 1 warp point). + Note: DivX5 gmc always uses just 1 warp point. + +pix_sum + Used for encoding. + +hadamard8_diff / sse / sad == pix_norm1 / dct_sad / quant_psnr / rd / bit + Specific compare functions used in encoding, it depends upon the + command line switches which of these are used. + Don't waste your time with dct_sad & quant_psnr, they aren't + really useful. + +put_pixels_clamped / add_pixels_clamped + Used for en/decoding in the IDCT, easy. + Note, some optimized IDCTs have the add/put clamped code included and + then put_pixels_clamped / add_pixels_clamped will be unused. + +idct/fdct + idct (encoding & decoding) + fdct (encoding) + difficult to optimize + +dct_quantize_trellis + Used for encoding with trellis quantization. + difficult to optimize + +dct_quantize + Used for encoding. + +dct_unquantize_mpeg1 + Used in MPEG-1 en/decoding. + +dct_unquantize_mpeg2 + Used in MPEG-2 en/decoding. + +dct_unquantize_h263 + Used in MPEG-4/H.263 en/decoding. + +FIXME remaining functions? +BTW, most of these functions are in dsputil.c/.h, some are in mpegvideo.c/.h. + + + +Alignment: +Some instructions on some architectures have strict alignment restrictions, +for example most SSE/SSE2 instructions on x86. +The minimum guaranteed alignment is written in the .h files, for example: + void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); + + + +Links: +http://www.aggregate.org/MAGIC/ + +x86-specific: +http://developer.intel.com/design/pentium4/manuals/248966.htm + +The IA-32 Intel Architecture Software Developer's Manual, Volume 2: +Instruction Set Reference +http://developer.intel.com/design/pentium4/manuals/245471.htm + +http://www.agner.org/assem/ + +AMD Athlon Processor x86 Code Optimization Guide: +http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/22007.pdf + +GCC asm links: +official doc but quite ugly +http://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html + +a bit old (note "+" is valid for input-output, even though the next disagrees) +http://www.cs.virginia.edu/~clc5q/gcc-inline-asm.pdf diff --git a/mpeg4/src/doc/texi2pod.pl b/mpeg4/src/doc/texi2pod.pl new file mode 100755 index 0000000000000000000000000000000000000000..c414ffcc6974a07fbacc72da8bd647665fc9a809 --- /dev/null +++ b/mpeg4/src/doc/texi2pod.pl @@ -0,0 +1,427 @@ +#! /usr/bin/perl -w + +# Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. + +# This file is part of GNU CC. + +# GNU CC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# GNU CC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with GNU CC; see the file COPYING. If not, write to +# the Free Software Foundation, 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA + +# This does trivial (and I mean _trivial_) conversion of Texinfo +# markup to Perl POD format. It's intended to be used to extract +# something suitable for a manpage from a Texinfo document. + +$output = 0; +$skipping = 0; +%sects = (); +$section = ""; +@icstack = (); +@endwstack = (); +@skstack = (); +@instack = (); +$shift = ""; +%defs = (); +$fnno = 1; +$inf = ""; +$ibase = ""; + +while ($_ = shift) { + if (/^-D(.*)$/) { + if ($1 ne "") { + $flag = $1; + } else { + $flag = shift; + } + $value = ""; + ($flag, $value) = ($flag =~ /^([^=]+)(?:=(.+))?/); + die "no flag specified for -D\n" + unless $flag ne ""; + die "flags may only contain letters, digits, hyphens, dashes and underscores\n" + unless $flag =~ /^[a-zA-Z0-9_-]+$/; + $defs{$flag} = $value; + } elsif (/^-/) { + usage(); + } else { + $in = $_, next unless defined $in; + $out = $_, next unless defined $out; + usage(); + } +} + +if (defined $in) { + $inf = gensym(); + open($inf, "<$in") or die "opening \"$in\": $!\n"; + $ibase = $1 if $in =~ m|^(.+)/[^/]+$|; +} else { + $inf = \*STDIN; +} + +if (defined $out) { + open(STDOUT, ">$out") or die "opening \"$out\": $!\n"; +} + +while(defined $inf) { +while(<$inf>) { + # Certain commands are discarded without further processing. + /^\@(?: + [a-z]+index # @*index: useful only in complete manual + |need # @need: useful only in printed manual + |(?:end\s+)?group # @group .. @end group: ditto + |page # @page: ditto + |node # @node: useful only in .info file + |(?:end\s+)?ifnottex # @ifnottex .. @end ifnottex: use contents + )\b/x and next; + + chomp; + + # Look for filename and title markers. + /^\@setfilename\s+([^.]+)/ and $fn = $1, next; + /^\@settitle\s+([^.]+)/ and $tl = postprocess($1), next; + + # Identify a man title but keep only the one we are interested in. + /^\@c\s+man\s+title\s+([A-Za-z0-9-]+)\s+(.+)/ and do { + if (exists $defs{$1}) { + $fn = $1; + $tl = postprocess($2); + } + next; + }; + + # Look for blocks surrounded by @c man begin SECTION ... @c man end. + # This really oughta be @ifman ... @end ifman and the like, but such + # would require rev'ing all other Texinfo translators. + /^\@c\s+man\s+begin\s+([A-Z]+)\s+([A-Za-z0-9-]+)/ and do { + $output = 1 if exists $defs{$2}; + $sect = $1; + next; + }; + /^\@c\s+man\s+begin\s+([A-Z]+)/ and $sect = $1, $output = 1, next; + /^\@c\s+man\s+end/ and do { + $sects{$sect} = "" unless exists $sects{$sect}; + $sects{$sect} .= postprocess($section); + $section = ""; + $output = 0; + next; + }; + + # handle variables + /^\@set\s+([a-zA-Z0-9_-]+)\s*(.*)$/ and do { + $defs{$1} = $2; + next; + }; + /^\@clear\s+([a-zA-Z0-9_-]+)/ and do { + delete $defs{$1}; + next; + }; + + next unless $output; + + # Discard comments. (Can't do it above, because then we'd never see + # @c man lines.) + /^\@c\b/ and next; + + # End-block handler goes up here because it needs to operate even + # if we are skipping. + /^\@end\s+([a-z]+)/ and do { + # Ignore @end foo, where foo is not an operation which may + # cause us to skip, if we are presently skipping. + my $ended = $1; + next if $skipping && $ended !~ /^(?:ifset|ifclear|ignore|menu|iftex)$/; + + die "\@end $ended without \@$ended at line $.\n" unless defined $endw; + die "\@$endw ended by \@end $ended at line $.\n" unless $ended eq $endw; + + $endw = pop @endwstack; + + if ($ended =~ /^(?:ifset|ifclear|ignore|menu|iftex)$/) { + $skipping = pop @skstack; + next; + } elsif ($ended =~ /^(?:example|smallexample|display)$/) { + $shift = ""; + $_ = ""; # need a paragraph break + } elsif ($ended =~ /^(?:itemize|enumerate|[fv]?table)$/) { + $_ = "\n=back\n"; + $ic = pop @icstack; + } else { + die "unknown command \@end $ended at line $.\n"; + } + }; + + # We must handle commands which can cause skipping even while we + # are skipping, otherwise we will not process nested conditionals + # correctly. + /^\@ifset\s+([a-zA-Z0-9_-]+)/ and do { + push @endwstack, $endw; + push @skstack, $skipping; + $endw = "ifset"; + $skipping = 1 unless exists $defs{$1}; + next; + }; + + /^\@ifclear\s+([a-zA-Z0-9_-]+)/ and do { + push @endwstack, $endw; + push @skstack, $skipping; + $endw = "ifclear"; + $skipping = 1 if exists $defs{$1}; + next; + }; + + /^\@(ignore|menu|iftex)\b/ and do { + push @endwstack, $endw; + push @skstack, $skipping; + $endw = $1; + $skipping = 1; + next; + }; + + next if $skipping; + + # Character entities. First the ones that can be replaced by raw text + # or discarded outright: + s/\@copyright\{\}/(c)/g; + s/\@dots\{\}/.../g; + s/\@enddots\{\}/..../g; + s/\@([.!? ])/$1/g; + s/\@[:-]//g; + s/\@bullet(?:\{\})?/*/g; + s/\@TeX\{\}/TeX/g; + s/\@pounds\{\}/\#/g; + s/\@minus(?:\{\})?/-/g; + s/\\,/,/g; + + # Now the ones that have to be replaced by special escapes + # (which will be turned back into text by unmunge()) + s/&/&/g; + s/\@\{/{/g; + s/\@\}/}/g; + s/\@\@/&at;/g; + + # Inside a verbatim block, handle @var specially. + if ($shift ne "") { + s/\@var\{([^\}]*)\}/<$1>/g; + } + + # POD doesn't interpret E<> inside a verbatim block. + if ($shift eq "") { + s//>/g; + } else { + s//>/g; + } + + # Single line command handlers. + + /^\@include\s+(.+)$/ and do { + push @instack, $inf; + $inf = gensym(); + + # Try cwd and $ibase. + open($inf, "<" . $1) + or open($inf, "<" . $ibase . "/" . $1) + or die "cannot open $1 or $ibase/$1: $!\n"; + next; + }; + + /^\@(?:section|unnumbered|unnumberedsec|center)\s+(.+)$/ + and $_ = "\n=head2 $1\n"; + /^\@subsection\s+(.+)$/ + and $_ = "\n=head3 $1\n"; + + # Block command handlers: + /^\@itemize\s+(\@[a-z]+|\*|-)/ and do { + push @endwstack, $endw; + push @icstack, $ic; + $ic = $1; + $_ = "\n=over 4\n"; + $endw = "itemize"; + }; + + /^\@enumerate(?:\s+([a-zA-Z0-9]+))?/ and do { + push @endwstack, $endw; + push @icstack, $ic; + if (defined $1) { + $ic = $1 . "."; + } else { + $ic = "1."; + } + $_ = "\n=over 4\n"; + $endw = "enumerate"; + }; + + /^\@([fv]?table)\s+(\@[a-z]+)/ and do { + push @endwstack, $endw; + push @icstack, $ic; + $endw = $1; + $ic = $2; + $ic =~ s/\@(?:samp|strong|key|gcctabopt|option|env)/B/; + $ic =~ s/\@(?:code|kbd)/C/; + $ic =~ s/\@(?:dfn|var|emph|cite|i)/I/; + $ic =~ s/\@(?:file)/F/; + $_ = "\n=over 4\n"; + }; + + /^\@((?:small)?example|display)/ and do { + push @endwstack, $endw; + $endw = $1; + $shift = "\t"; + $_ = ""; # need a paragraph break + }; + + /^\@itemx?\s*(.+)?$/ and do { + if (defined $1) { + # Entity escapes prevent munging by the <> processing below. + $_ = "\n=item $ic\<$1\>\n"; + } else { + $_ = "\n=item $ic\n"; + $ic =~ y/A-Ya-y/B-Zb-z/; + $ic =~ s/(\d+)/$1 + 1/eg; + } + }; + + $section .= $shift.$_."\n"; +} +# End of current file. +close($inf); +$inf = pop @instack; +} + +die "No filename or title\n" unless defined $fn && defined $tl; + +$sects{NAME} = "$fn \- $tl\n"; +$sects{FOOTNOTES} .= "=back\n" if exists $sects{FOOTNOTES}; + +for $sect (qw(NAME SYNOPSIS DESCRIPTION OPTIONS EXAMPLES ENVIRONMENT FILES + BUGS NOTES FOOTNOTES SEEALSO AUTHOR COPYRIGHT)) { + if(exists $sects{$sect}) { + $head = $sect; + $head =~ s/SEEALSO/SEE ALSO/; + print "=head1 $head\n\n"; + print scalar unmunge ($sects{$sect}); + print "\n"; + } +} + +sub usage +{ + die "usage: $0 [-D toggle...] [infile [outfile]]\n"; +} + +sub postprocess +{ + local $_ = $_[0]; + + # @value{foo} is replaced by whatever 'foo' is defined as. + while (m/(\@value\{([a-zA-Z0-9_-]+)\})/g) { + if (! exists $defs{$2}) { + print STDERR "Option $2 not defined\n"; + s/\Q$1\E//; + } else { + $value = $defs{$2}; + s/\Q$1\E/$value/; + } + } + + # Formatting commands. + # Temporary escape for @r. + s/\@r\{([^\}]*)\}/R<$1>/g; + s/\@(?:dfn|var|emph|cite|i)\{([^\}]*)\}/I<$1>/g; + s/\@(?:code|kbd)\{([^\}]*)\}/C<$1>/g; + s/\@(?:gccoptlist|samp|strong|key|option|env|command|b)\{([^\}]*)\}/B<$1>/g; + s/\@sc\{([^\}]*)\}/\U$1/g; + s/\@file\{([^\}]*)\}/F<$1>/g; + s/\@w\{([^\}]*)\}/S<$1>/g; + s/\@(?:dmn|math)\{([^\}]*)\}/$1/g; + + # Cross references are thrown away, as are @noindent and @refill. + # (@noindent is impossible in .pod, and @refill is unnecessary.) + # @* is also impossible in .pod; we discard it and any newline that + # follows it. Similarly, our macro @gol must be discarded. + + s/\(?\@xref\{(?:[^\}]*)\}(?:[^.<]|(?:<[^<>]*>))*\.\)?//g; + s/\s+\(\@pxref\{(?:[^\}]*)\}\)//g; + s/;\s+\@pxref\{(?:[^\}]*)\}//g; + s/\@noindent\s*//g; + s/\@refill//g; + s/\@gol//g; + s/\@\*\s*\n?//g; + + # @uref can take one, two, or three arguments, with different + # semantics each time. @url and @email are just like @uref with + # one argument, for our purposes. + s/\@(?:uref|url|email)\{([^\},]*)\}/<B<$1>>/g; + s/\@uref\{([^\},]*),([^\},]*)\}/$2 (C<$1>)/g; + s/\@uref\{([^\},]*),([^\},]*),([^\},]*)\}/$3/g; + + # Turn B blah> into B I B to + # match Texinfo semantics of @emph inside @samp. Also handle @r + # inside bold. + s/<//g; + 1 while s/B<((?:[^<>]|I<[^<>]*>)*)R<([^>]*)>/B<$1>${2}B]*)I<([^>]+)>/B<$1>I<$2>B]*)B<([^>]+)>/I<$1>B<$2>I//g; + s/([BI])<(\s+)([^>]+)>/$2$1<$3>/g; + s/([BI])<([^>]+?)(\s+)>/$1<$2>$3/g; + + # Extract footnotes. This has to be done after all other + # processing because otherwise the regexp will choke on formatting + # inside @footnote. + while (/\@footnote/g) { + s/\@footnote\{([^\}]+)\}/[$fnno]/; + add_footnote($1, $fnno); + $fnno++; + } + + return $_; +} + +sub unmunge +{ + # Replace escaped symbols with their equivalents. + local $_ = $_[0]; + + s/</E/g; + s/>/E/g; + s/{/\{/g; + s/}/\}/g; + s/&at;/\@/g; + s/&/&/g; + return $_; +} + +sub add_footnote +{ + unless (exists $sects{FOOTNOTES}) { + $sects{FOOTNOTES} = "\n=over 4\n\n"; + } + + $sects{FOOTNOTES} .= "=item $fnno.\n\n"; $fnno++; + $sects{FOOTNOTES} .= $_[0]; + $sects{FOOTNOTES} .= "\n\n"; +} + +# stolen from Symbol.pm +{ + my $genseq = 0; + sub gensym + { + my $name = "GEN" . $genseq++; + my $ref = \*{$name}; + delete $::{$name}; + return $ref; + } +} diff --git a/mpeg4/src/ffinstall.nsi b/mpeg4/src/ffinstall.nsi new file mode 100644 index 0000000000000000000000000000000000000000..f483b0174f659230ce107140aff95f8a2528fc29 --- /dev/null +++ b/mpeg4/src/ffinstall.nsi @@ -0,0 +1,75 @@ +;NSIS Script For FFmpeg + +;Title Of Your Application +Name "FFmpeg" +CompletedText "FFmpeg install completed! Enjoy your meal!" + +; do a CRC check +CRCCheck On + +; output file name +OutFile "FFinstall.exe" + +; license page introduction +LicenseText "You must agree to this license before installing." + +; license data +LicenseData ".\COPYING" + +; the default installation directory +InstallDir "$PROGRAMFILES\FFmpeg" + +;The text to prompt the user to enter a directory +DirText "Please select the folder below" + +Section "Install" + ;Install Files + SetOutPath $INSTDIR + SetCompress Auto + SetOverwrite IfNewer + File ".\ffmpeg.exe" + File ".\SDL.dll" + File ".\ffplay.exe" + File ".\COPYING" + File ".\CREDITS" + + ; documentation + SetOutPath $INSTDIR\doc + File ".\doc\faq.html" + File ".\doc\ffmpeg-doc.html" + File ".\doc\ffplay-doc.html" + + ; Write the uninstall keys for Windows + WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\FFmpeg" "DisplayName" "FFmpeg (remove only)" + WriteRegStr HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\FFmpeg" "UninstallString" "$INSTDIR\Uninst.exe" +WriteUninstaller "Uninst.exe" +SectionEnd + +Section "Shortcuts" + ;Add Shortcuts +SectionEnd + +UninstallText "This will uninstall FFmpeg from your system" + +Section Uninstall + ; delete files + Delete "$INSTDIR\ffmpeg.exe" + Delete "$INSTDIR\SDL.dll" + Delete "$INSTDIR\ffplay.exe" + Delete "$INSTDIR\COPYING" + Delete "$INSTDIR\CREDITS" + + ; delete documentation + Delete "$INSTDIR\doc\faq.html" + Delete "$INSTDIR\ffmpeg-doc.html" + Delete "$INSTDIR\doc\ffplay-doc.html" + + RMDir /r $INSTDIR\doc + + ; delete uninstaller and unistall registry entries + Delete "$INSTDIR\Uninst.exe" + DeleteRegKey HKEY_LOCAL_MACHINE "SOFTWARE\FFmpeg" + DeleteRegKey HKEY_LOCAL_MACHINE "SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall\FFmpeg" + RMDir "$INSTDIR" +SectionEnd + diff --git a/mpeg4/src/ffmpeg.c b/mpeg4/src/ffmpeg.c new file mode 100644 index 0000000000000000000000000000000000000000..d03a538484e393e0ddd5e10dd93eb2e383bc2b6e --- /dev/null +++ b/mpeg4/src/ffmpeg.c @@ -0,0 +1,4221 @@ +/* + * FFmpeg main + * Copyright (c) 2000-2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#define HAVE_AV_CONFIG_H +#include +#include "avformat.h" +#include "framehook.h" +#include "dsputil.h" +#include "opt.h" + +#ifndef CONFIG_WIN32 +#include +#include +#include +#include +#include +#include +#include +#endif +#ifdef CONFIG_OS2 +#include +#include +#include +#endif +#undef time //needed because HAVE_AV_CONFIG_H is defined on top +#include + +#include "cmdutils.h" + +#undef NDEBUG +#include + +#if !defined(INFINITY) && defined(HUGE_VAL) +#define INFINITY HUGE_VAL +#endif + +/* select an input stream for an output stream */ +typedef struct AVStreamMap { + int file_index; + int stream_index; + int sync_file_index; + int sync_stream_index; +} AVStreamMap; + +/** select an input file for an output file */ +typedef struct AVMetaDataMap { + int out_file; + int in_file; +} AVMetaDataMap; + +extern const OptionDef options[]; + +static void show_help(void); +static void show_license(void); +static int opt_default(const char *opt, const char *arg); + +#define MAX_FILES 20 + +static AVFormatContext *input_files[MAX_FILES]; +static int64_t input_files_ts_offset[MAX_FILES]; +static int nb_input_files = 0; + +static AVFormatContext *output_files[MAX_FILES]; +static int nb_output_files = 0; + +static AVStreamMap stream_maps[MAX_FILES]; +static int nb_stream_maps; + +static AVMetaDataMap meta_data_maps[MAX_FILES]; +static int nb_meta_data_maps; + +static AVInputFormat *file_iformat; +static AVOutputFormat *file_oformat; +static AVImageFormat *image_format; +static int frame_width = 0; +static int frame_height = 0; +static float frame_aspect_ratio = 0; +static enum PixelFormat frame_pix_fmt = PIX_FMT_NONE; +static int frame_padtop = 0; +static int frame_padbottom = 0; +static int frame_padleft = 0; +static int frame_padright = 0; +static int padcolor[3] = {16,128,128}; /* default to black */ +static int frame_topBand = 0; +static int frame_bottomBand = 0; +static int frame_leftBand = 0; +static int frame_rightBand = 0; +static int max_frames[4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX}; +static int frame_rate = 25; +static int frame_rate_base = 1; +static int video_bit_rate = 200*1000; +static int video_bit_rate_tolerance = 4000*1000; +static float video_qscale = 0; +static int video_qmin = 2; +static int video_qmax = 31; +static int video_lmin = 2*FF_QP2LAMBDA; +static int video_lmax = 31*FF_QP2LAMBDA; +static int video_mb_lmin = 2*FF_QP2LAMBDA; +static int video_mb_lmax = 31*FF_QP2LAMBDA; +static int video_qdiff = 3; +static float video_qblur = 0.5; +static float video_qsquish = 0.0; +static float video_qcomp = 0.5; +static uint16_t *intra_matrix = NULL; +static uint16_t *inter_matrix = NULL; +#if 0 //experimental, (can be removed) +static float video_rc_qsquish=1.0; +static float video_rc_qmod_amp=0; +static int video_rc_qmod_freq=0; +#endif +static char *video_rc_override_string=NULL; +static char *video_rc_eq="tex^qComp"; +static int video_rc_buffer_size=0; +static float video_rc_buffer_aggressivity=1.0; +static int video_rc_max_rate=0; +static int video_rc_min_rate=0; +static float video_rc_initial_cplx=0; +static float video_b_qfactor = 1.25; +static float video_b_qoffset = 1.25; +static float video_i_qfactor = -0.8; +static float video_i_qoffset = 0.0; +static int video_intra_quant_bias= FF_DEFAULT_QUANT_BIAS; +static int video_inter_quant_bias= FF_DEFAULT_QUANT_BIAS; +static int me_method = ME_EPZS; +static int video_disable = 0; +static int video_discard = 0; +static int video_codec_id = CODEC_ID_NONE; +static int video_codec_tag = 0; +static int same_quality = 0; +static int b_frames = 0; +static int pre_me = 0; +static int do_deinterlace = 0; +static int workaround_bugs = FF_BUG_AUTODETECT; +static int packet_size = 0; +static int error_rate = 0; +static int strict = 0; +static int top_field_first = -1; +static int sc_threshold = 0; +static int me_threshold = 0; +static int mb_threshold = 0; +static int intra_dc_precision = 8; +static int me_penalty_compensation= 256; +static int frame_skip_threshold= 0; +static int frame_skip_factor= 0; +static int frame_skip_exp= 0; +extern int loop_input; /* currently a hack */ +static int loop_output = AVFMT_NOOUTPUTLOOP; +static int genpts = 0; +static int qp_hist = 0; + +static int gop_size = 12; +static int intra_only = 0; +static int audio_sample_rate = 44100; +static int audio_bit_rate = 64000; +#define QSCALE_NONE -99999 +static float audio_qscale = QSCALE_NONE; +static int audio_disable = 0; +static int audio_channels = 1; +static int audio_codec_id = CODEC_ID_NONE; +static int audio_codec_tag = 0; +static char *audio_language = NULL; + +static int subtitle_codec_id = CODEC_ID_NONE; +static char *subtitle_language = NULL; + +static int mux_rate= 0; +static int mux_packet_size= 0; +static float mux_preload= 0.5; +static float mux_max_delay= 0.7; + +static int64_t recording_time = 0; +static int64_t start_time = 0; +static int64_t rec_timestamp = 0; +static int64_t input_ts_offset = 0; +static int file_overwrite = 0; +static char *str_title = NULL; +static char *str_author = NULL; +static char *str_copyright = NULL; +static char *str_comment = NULL; +static int do_benchmark = 0; +static int do_hex_dump = 0; +static int do_pkt_dump = 0; +static int do_psnr = 0; +static int do_vstats = 0; +static int do_pass = 0; +static char *pass_logfilename = NULL; +static int audio_stream_copy = 0; +static int video_stream_copy = 0; +static int subtitle_stream_copy = 0; +static int video_sync_method= 1; +static int audio_sync_method= 0; +static int copy_ts= 0; +static int opt_shortest = 0; // +static int video_global_header = 0; + +static int rate_emu = 0; + +#ifdef CONFIG_BKTR +static char *video_grab_format = "bktr"; +#else +#ifdef CONFIG_VIDEO4LINUX2 +static char *video_grab_format = "video4linux2"; +#else +static char *video_grab_format = "video4linux"; +#endif +#endif +static char *video_device = NULL; +static char *grab_device = NULL; +static int video_channel = 0; +static char *video_standard = "ntsc"; + +static char *audio_grab_format = "audio_device"; +static char *audio_device = NULL; +static int audio_volume = 256; + +static int using_stdin = 0; +static int using_vhook = 0; +static int verbose = 1; +static int thread_count= 1; +static int q_pressed = 0; +static int me_range = 0; +static int64_t video_size = 0; +static int64_t audio_size = 0; +static int64_t extra_size = 0; +static int nb_frames_dup = 0; +static int nb_frames_drop = 0; +static int input_sync; +static int limit_filesize = 0; // + +static int pgmyuv_compatibility_hack=0; +static int dts_delta_threshold = 10; + +const char **opt_names=NULL; +int opt_name_count=0; +AVCodecContext *avctx_opts; + + +#define DEFAULT_PASS_LOGFILENAME "ffmpeg2pass" + +struct AVInputStream; + +typedef struct AVOutputStream { + int file_index; /* file index */ + int index; /* stream index in the output file */ + int source_index; /* AVInputStream index */ + AVStream *st; /* stream in the output file */ + int encoding_needed; /* true if encoding needed for this stream */ + int frame_number; + /* input pts and corresponding output pts + for A/V sync */ + //double sync_ipts; /* dts from the AVPacket of the demuxer in second units */ + struct AVInputStream *sync_ist; /* input stream to sync against */ + int64_t sync_opts; /* output frame counter, could be changed to some true timestamp */ //FIXME look at frame_number + /* video only */ + int video_resample; + AVFrame pict_tmp; /* temporary image for resampling */ + ImgReSampleContext *img_resample_ctx; /* for image resampling */ + + int video_crop; + int topBand; /* cropping area sizes */ + int leftBand; + + int video_pad; + int padtop; /* padding area sizes */ + int padbottom; + int padleft; + int padright; + + /* audio only */ + int audio_resample; + ReSampleContext *resample; /* for audio resampling */ + FifoBuffer fifo; /* for compression: one audio fifo per codec */ + FILE *logfile; +} AVOutputStream; + +typedef struct AVInputStream { + int file_index; + int index; + AVStream *st; + int discard; /* true if stream data should be discarded */ + int decoding_needed; /* true if the packets must be decoded in 'raw_fifo' */ + int64_t sample_index; /* current sample */ + + int64_t start; /* time when read started */ + unsigned long frame; /* current frame */ + int64_t next_pts; /* synthetic pts for cases where pkt.pts + is not defined */ + int64_t pts; /* current pts */ + int is_start; /* is 1 at the start and after a discontinuity */ +} AVInputStream; + +typedef struct AVInputFile { + int eof_reached; /* true if eof reached */ + int ist_index; /* index of first stream in ist_table */ + int buffer_size; /* current total buffer size */ + int buffer_size_max; /* buffer size at which we consider we can stop + buffering */ + int nb_streams; /* nb streams we are aware of */ +} AVInputFile; + +#ifndef CONFIG_WIN32 + +/* init terminal so that we can grab keys */ +static struct termios oldtty; + +static void term_exit(void) +{ + tcsetattr (0, TCSANOW, &oldtty); +} + +static volatile sig_atomic_t received_sigterm = 0; + +static void +sigterm_handler(int sig) +{ + received_sigterm = sig; + term_exit(); +} + +static void term_init(void) +{ + struct termios tty; + + tcgetattr (0, &tty); + oldtty = tty; + + tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP + |INLCR|IGNCR|ICRNL|IXON); + tty.c_oflag |= OPOST; + tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); + tty.c_cflag &= ~(CSIZE|PARENB); + tty.c_cflag |= CS8; + tty.c_cc[VMIN] = 1; + tty.c_cc[VTIME] = 0; + + tcsetattr (0, TCSANOW, &tty); + + signal(SIGINT , sigterm_handler); /* Interrupt (ANSI). */ + signal(SIGQUIT, sigterm_handler); /* Quit (POSIX). */ + signal(SIGTERM, sigterm_handler); /* Termination (ANSI). */ + /* + register a function to be called at normal program termination + */ + atexit(term_exit); +#ifdef CONFIG_BEOS_NETSERVER + fcntl(0, F_SETFL, fcntl(0, F_GETFL) | O_NONBLOCK); +#endif +} + +/* read a key without blocking */ +static int read_key(void) +{ + int n = 1; + unsigned char ch; +#ifndef CONFIG_BEOS_NETSERVER + struct timeval tv; + fd_set rfds; + + FD_ZERO(&rfds); + FD_SET(0, &rfds); + tv.tv_sec = 0; + tv.tv_usec = 0; + n = select(1, &rfds, NULL, NULL, &tv); +#endif + if (n > 0) { + n = read(0, &ch, 1); + if (n == 1) + return ch; + + return n; + } + return -1; +} + +static int decode_interrupt_cb(void) +{ + return q_pressed || (q_pressed = read_key() == 'q'); +} + +#else + +static volatile int received_sigterm = 0; + +/* no interactive support */ +static void term_exit(void) +{ +} + +static void term_init(void) +{ +} + +static int read_key(void) +{ + return 0; +} + +#endif + +static int read_ffserver_streams(AVFormatContext *s, const char *filename) +{ + int i, err; + AVFormatContext *ic; + + err = av_open_input_file(&ic, filename, NULL, FFM_PACKET_SIZE, NULL); + if (err < 0) + return err; + /* copy stream format */ + s->nb_streams = ic->nb_streams; + for(i=0;inb_streams;i++) { + AVStream *st; + + // FIXME: a more elegant solution is needed + st = av_mallocz(sizeof(AVStream)); + memcpy(st, ic->streams[i], sizeof(AVStream)); + st->codec = avcodec_alloc_context(); + memcpy(st->codec, ic->streams[i]->codec, sizeof(AVCodecContext)); + s->streams[i] = st; + } + + av_close_input_file(ic); + return 0; +} + +static double +get_sync_ipts(const AVOutputStream *ost) +{ + const AVInputStream *ist = ost->sync_ist; + return (double)(ist->pts + input_files_ts_offset[ist->file_index] - start_time)/AV_TIME_BASE; +} + +#define MAX_AUDIO_PACKET_SIZE (128 * 1024) + +static void do_audio_out(AVFormatContext *s, + AVOutputStream *ost, + AVInputStream *ist, + unsigned char *buf, int size) +{ + uint8_t *buftmp; + static uint8_t *audio_buf = NULL; + static uint8_t *audio_out = NULL; + const int audio_out_size= 4*MAX_AUDIO_PACKET_SIZE; + + int size_out, frame_bytes, ret; + AVCodecContext *enc= ost->st->codec; + + /* SC: dynamic allocation of buffers */ + if (!audio_buf) + audio_buf = av_malloc(2*MAX_AUDIO_PACKET_SIZE); + if (!audio_out) + audio_out = av_malloc(audio_out_size); + if (!audio_buf || !audio_out) + return; /* Should signal an error ! */ + + if(audio_sync_method){ + double delta = get_sync_ipts(ost) * enc->sample_rate - ost->sync_opts + - fifo_size(&ost->fifo, ost->fifo.rptr)/(ost->st->codec->channels * 2); + double idelta= delta*ist->st->codec->sample_rate / enc->sample_rate; + int byte_delta= ((int)idelta)*2*ist->st->codec->channels; + + //FIXME resample delay + if(fabs(delta) > 50){ + if(ist->is_start){ + if(byte_delta < 0){ + byte_delta= FFMAX(byte_delta, -size); + size += byte_delta; + buf -= byte_delta; + if(verbose > 2) + fprintf(stderr, "discarding %d audio samples\n", (int)-delta); + if(!size) + return; + ist->is_start=0; + }else{ + static uint8_t *input_tmp= NULL; + input_tmp= av_realloc(input_tmp, byte_delta + size); + + if(byte_delta + size <= MAX_AUDIO_PACKET_SIZE) + ist->is_start=0; + else + byte_delta= MAX_AUDIO_PACKET_SIZE - size; + + memset(input_tmp, 0, byte_delta); + memcpy(input_tmp + byte_delta, buf, size); + buf= input_tmp; + size += byte_delta; + if(verbose > 2) + fprintf(stderr, "adding %d audio samples of silence\n", (int)delta); + } + }else if(audio_sync_method>1){ + int comp= clip(delta, -audio_sync_method, audio_sync_method); + assert(ost->audio_resample); + if(verbose > 2) + fprintf(stderr, "compensating audio timestamp drift:%f compensation:%d in:%d\n", delta, comp, enc->sample_rate); +// fprintf(stderr, "drift:%f len:%d opts:%lld ipts:%lld fifo:%d\n", delta, -1, ost->sync_opts, (int64_t)(get_sync_ipts(ost) * enc->sample_rate), fifo_size(&ost->fifo, ost->fifo.rptr)/(ost->st->codec->channels * 2)); + av_resample_compensate(*(struct AVResampleContext**)ost->resample, comp, enc->sample_rate); + } + } + }else + ost->sync_opts= lrintf(get_sync_ipts(ost) * enc->sample_rate) + - fifo_size(&ost->fifo, ost->fifo.rptr)/(ost->st->codec->channels * 2); //FIXME wrong + + if (ost->audio_resample) { + buftmp = audio_buf; + size_out = audio_resample(ost->resample, + (short *)buftmp, (short *)buf, + size / (ist->st->codec->channels * 2)); + size_out = size_out * enc->channels * 2; + } else { + buftmp = buf; + size_out = size; + } + + /* now encode as many frames as possible */ + if (enc->frame_size > 1) { + /* output resampled raw samples */ + fifo_write(&ost->fifo, buftmp, size_out, + &ost->fifo.wptr); + + frame_bytes = enc->frame_size * 2 * enc->channels; + + while (fifo_read(&ost->fifo, audio_buf, frame_bytes, + &ost->fifo.rptr) == 0) { + AVPacket pkt; + av_init_packet(&pkt); + + ret = avcodec_encode_audio(enc, audio_out, audio_out_size, + (short *)audio_buf); + audio_size += ret; + pkt.stream_index= ost->index; + pkt.data= audio_out; + pkt.size= ret; + if(enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) + pkt.pts= av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base); + pkt.flags |= PKT_FLAG_KEY; + av_interleaved_write_frame(s, &pkt); + + ost->sync_opts += enc->frame_size; + } + } else { + AVPacket pkt; + av_init_packet(&pkt); + + ost->sync_opts += size_out / (2 * enc->channels); + + /* output a pcm frame */ + /* XXX: change encoding codec API to avoid this ? */ + switch(enc->codec->id) { + case CODEC_ID_PCM_S32LE: + case CODEC_ID_PCM_S32BE: + case CODEC_ID_PCM_U32LE: + case CODEC_ID_PCM_U32BE: + size_out = size_out << 1; + break; + case CODEC_ID_PCM_S24LE: + case CODEC_ID_PCM_S24BE: + case CODEC_ID_PCM_U24LE: + case CODEC_ID_PCM_U24BE: + case CODEC_ID_PCM_S24DAUD: + size_out = size_out / 2 * 3; + break; + case CODEC_ID_PCM_S16LE: + case CODEC_ID_PCM_S16BE: + case CODEC_ID_PCM_U16LE: + case CODEC_ID_PCM_U16BE: + break; + default: + size_out = size_out >> 1; + break; + } + ret = avcodec_encode_audio(enc, audio_out, size_out, + (short *)buftmp); + audio_size += ret; + pkt.stream_index= ost->index; + pkt.data= audio_out; + pkt.size= ret; + if(enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) + pkt.pts= av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base); + pkt.flags |= PKT_FLAG_KEY; + av_interleaved_write_frame(s, &pkt); + } +} + +static void pre_process_video_frame(AVInputStream *ist, AVPicture *picture, void **bufp) +{ + AVCodecContext *dec; + AVPicture *picture2; + AVPicture picture_tmp; + uint8_t *buf = 0; + + dec = ist->st->codec; + + /* deinterlace : must be done before any resize */ + if (do_deinterlace || using_vhook) { + int size; + + /* create temporary picture */ + size = avpicture_get_size(dec->pix_fmt, dec->width, dec->height); + buf = av_malloc(size); + if (!buf) + return; + + picture2 = &picture_tmp; + avpicture_fill(picture2, buf, dec->pix_fmt, dec->width, dec->height); + + if (do_deinterlace){ + if(avpicture_deinterlace(picture2, picture, + dec->pix_fmt, dec->width, dec->height) < 0) { + /* if error, do not deinterlace */ + av_free(buf); + buf = NULL; + picture2 = picture; + } + } else { + img_copy(picture2, picture, dec->pix_fmt, dec->width, dec->height); + } + } else { + picture2 = picture; + } + + frame_hook_process(picture2, dec->pix_fmt, dec->width, dec->height); + + if (picture != picture2) + *picture = *picture2; + *bufp = buf; +} + +/* we begin to correct av delay at this threshold */ +#define AV_DELAY_MAX 0.100 + +static void do_subtitle_out(AVFormatContext *s, + AVOutputStream *ost, + AVInputStream *ist, + AVSubtitle *sub, + int64_t pts) +{ + static uint8_t *subtitle_out = NULL; + int subtitle_out_max_size = 65536; + int subtitle_out_size, nb, i; + AVCodecContext *enc; + AVPacket pkt; + + if (pts == AV_NOPTS_VALUE) { + fprintf(stderr, "Subtitle packets must have a pts\n"); + return; + } + + enc = ost->st->codec; + + if (!subtitle_out) { + subtitle_out = av_malloc(subtitle_out_max_size); + } + + /* Note: DVB subtitle need one packet to draw them and one other + packet to clear them */ + /* XXX: signal it in the codec context ? */ + if (enc->codec_id == CODEC_ID_DVB_SUBTITLE) + nb = 2; + else + nb = 1; + + for(i = 0; i < nb; i++) { + subtitle_out_size = avcodec_encode_subtitle(enc, subtitle_out, + subtitle_out_max_size, sub); + + av_init_packet(&pkt); + pkt.stream_index = ost->index; + pkt.data = subtitle_out; + pkt.size = subtitle_out_size; + pkt.pts = av_rescale_q(av_rescale_q(pts, ist->st->time_base, AV_TIME_BASE_Q) + input_files_ts_offset[ist->file_index], AV_TIME_BASE_Q, ost->st->time_base); + if (enc->codec_id == CODEC_ID_DVB_SUBTITLE) { + /* XXX: the pts correction is handled here. Maybe handling + it in the codec would be better */ + if (i == 0) + pkt.pts += 90 * sub->start_display_time; + else + pkt.pts += 90 * sub->end_display_time; + } + av_interleaved_write_frame(s, &pkt); + } +} + +static int bit_buffer_size= 1024*256; +static uint8_t *bit_buffer= NULL; + +static void do_video_out(AVFormatContext *s, + AVOutputStream *ost, + AVInputStream *ist, + AVFrame *in_picture, + int *frame_size) +{ + int nb_frames, i, ret; + AVFrame *final_picture, *formatted_picture, *resampling_dst, *padding_src; + AVFrame picture_format_temp, picture_crop_temp, picture_pad_temp; + uint8_t *buf = NULL, *buf1 = NULL; + AVCodecContext *enc, *dec; + enum PixelFormat target_pixfmt; + + avcodec_get_frame_defaults(&picture_format_temp); + avcodec_get_frame_defaults(&picture_crop_temp); + avcodec_get_frame_defaults(&picture_pad_temp); + + enc = ost->st->codec; + dec = ist->st->codec; + + /* by default, we output a single frame */ + nb_frames = 1; + + *frame_size = 0; + + if(video_sync_method){ + double vdelta; + vdelta = get_sync_ipts(ost) / av_q2d(enc->time_base) - ost->sync_opts; + //FIXME set to 0.5 after we fix some dts/pts bugs like in avidec.c + if (vdelta < -1.1) + nb_frames = 0; + else if (vdelta > 1.1) + nb_frames = lrintf(vdelta); +//fprintf(stderr, "vdelta:%f, ost->sync_opts:%lld, ost->sync_ipts:%f nb_frames:%d\n", vdelta, ost->sync_opts, ost->sync_ipts, nb_frames); + if (nb_frames == 0){ + ++nb_frames_drop; + if (verbose>2) + fprintf(stderr, "*** drop!\n"); + }else if (nb_frames > 1) { + nb_frames_dup += nb_frames; + if (verbose>2) + fprintf(stderr, "*** %d dup!\n", nb_frames-1); + } + }else + ost->sync_opts= lrintf(get_sync_ipts(ost) / av_q2d(enc->time_base)); + + nb_frames= FFMIN(nb_frames, max_frames[CODEC_TYPE_VIDEO] - ost->frame_number); + if (nb_frames <= 0) + return; + + /* convert pixel format if needed */ + target_pixfmt = ost->video_resample ? PIX_FMT_YUV420P : enc->pix_fmt; + if (dec->pix_fmt != target_pixfmt) { + int size; + + /* create temporary picture */ + size = avpicture_get_size(target_pixfmt, dec->width, dec->height); + buf = av_malloc(size); + if (!buf) + return; + formatted_picture = &picture_format_temp; + avpicture_fill((AVPicture*)formatted_picture, buf, target_pixfmt, dec->width, dec->height); + + if (img_convert((AVPicture*)formatted_picture, target_pixfmt, + (AVPicture *)in_picture, dec->pix_fmt, + dec->width, dec->height) < 0) { + + if (verbose >= 0) + fprintf(stderr, "pixel format conversion not handled\n"); + + goto the_end; + } + } else { + formatted_picture = in_picture; + } + + if (ost->video_crop) { + if (img_crop((AVPicture *)&picture_crop_temp, (AVPicture *)formatted_picture, target_pixfmt, ost->topBand, ost->leftBand) < 0) { + av_log(NULL, AV_LOG_ERROR, "error cropping picture\n"); + goto the_end; + } + formatted_picture = &picture_crop_temp; + } + + final_picture = formatted_picture; + padding_src = formatted_picture; + resampling_dst = &ost->pict_tmp; + if (ost->video_pad) { + final_picture = &ost->pict_tmp; + if (ost->video_resample) { + if (img_crop((AVPicture *)&picture_pad_temp, (AVPicture *)final_picture, target_pixfmt, ost->padtop, ost->padleft) < 0) { + av_log(NULL, AV_LOG_ERROR, "error padding picture\n"); + goto the_end; + } + resampling_dst = &picture_pad_temp; + } + } + + /* XXX: resampling could be done before raw format conversion in + some cases to go faster */ + /* XXX: only works for YUV420P */ + if (ost->video_resample) { + padding_src = NULL; + final_picture = &ost->pict_tmp; + img_resample(ost->img_resample_ctx, (AVPicture *)resampling_dst, (AVPicture*)formatted_picture); + } + + if (enc->pix_fmt != target_pixfmt) { + int size; + + av_free(buf); + /* create temporary picture */ + size = avpicture_get_size(enc->pix_fmt, enc->width, enc->height); + buf = av_malloc(size); + if (!buf) + return; + final_picture = &picture_format_temp; + avpicture_fill((AVPicture*)final_picture, buf, enc->pix_fmt, enc->width, enc->height); + + if (img_convert((AVPicture*)final_picture, enc->pix_fmt, + (AVPicture*)&ost->pict_tmp, target_pixfmt, + enc->width, enc->height) < 0) { + + if (verbose >= 0) + fprintf(stderr, "pixel format conversion not handled\n"); + + goto the_end; + } + } + + if (ost->video_pad) { + img_pad((AVPicture*)final_picture, (AVPicture *)padding_src, + enc->height, enc->width, enc->pix_fmt, + ost->padtop, ost->padbottom, ost->padleft, ost->padright, padcolor); + } + + /* duplicates frame if needed */ + for(i=0;iindex; + + if (s->oformat->flags & AVFMT_RAWPICTURE) { + /* raw pictures are written as AVPicture structure to + avoid any copies. We support temorarily the older + method. */ + AVFrame* old_frame = enc->coded_frame; + enc->coded_frame = dec->coded_frame; //FIXME/XXX remove this hack + pkt.data= (uint8_t *)final_picture; + pkt.size= sizeof(AVPicture); + if(dec->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) + pkt.pts= av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base); + if(dec->coded_frame && dec->coded_frame->key_frame) + pkt.flags |= PKT_FLAG_KEY; + + av_interleaved_write_frame(s, &pkt); + enc->coded_frame = old_frame; + } else { + AVFrame big_picture; + + big_picture= *final_picture; + /* better than nothing: use input picture interlaced + settings */ + big_picture.interlaced_frame = in_picture->interlaced_frame; + if(avctx_opts->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME)){ + if(top_field_first == -1) + big_picture.top_field_first = in_picture->top_field_first; + else + big_picture.top_field_first = top_field_first; + } + + /* handles sameq here. This is not correct because it may + not be a global option */ + if (same_quality) { + big_picture.quality = ist->st->quality; + }else + big_picture.quality = ost->st->quality; + if(!me_threshold) + big_picture.pict_type = 0; +// big_picture.pts = AV_NOPTS_VALUE; + big_picture.pts= ost->sync_opts; +// big_picture.pts= av_rescale(ost->sync_opts, AV_TIME_BASE*(int64_t)enc->time_base.num, enc->time_base.den); +//av_log(NULL, AV_LOG_DEBUG, "%lld -> encoder\n", ost->sync_opts); + ret = avcodec_encode_video(enc, + bit_buffer, bit_buffer_size, + &big_picture); + //enc->frame_number = enc->real_pict_num; + if(ret>0){ + pkt.data= bit_buffer; + pkt.size= ret; + if(enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) + pkt.pts= av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base); +/*av_log(NULL, AV_LOG_DEBUG, "encoder -> %lld/%lld\n", + pkt.pts != AV_NOPTS_VALUE ? av_rescale(pkt.pts, enc->time_base.den, AV_TIME_BASE*(int64_t)enc->time_base.num) : -1, + pkt.dts != AV_NOPTS_VALUE ? av_rescale(pkt.dts, enc->time_base.den, AV_TIME_BASE*(int64_t)enc->time_base.num) : -1);*/ + + if(enc->coded_frame && enc->coded_frame->key_frame) + pkt.flags |= PKT_FLAG_KEY; + av_interleaved_write_frame(s, &pkt); + *frame_size = ret; + //fprintf(stderr,"\nFrame: %3d %3d size: %5d type: %d", + // enc->frame_number-1, enc->real_pict_num, ret, + // enc->pict_type); + /* if two pass, output log */ + if (ost->logfile && enc->stats_out) { + fprintf(ost->logfile, "%s", enc->stats_out); + } + } + } + ost->sync_opts++; + ost->frame_number++; + } + the_end: + av_free(buf); + av_free(buf1); +} + +static double psnr(double d){ + if(d==0) return INFINITY; + return -10.0*log(d)/log(10.0); +} + +static void do_video_stats(AVFormatContext *os, AVOutputStream *ost, + int frame_size) +{ + static FILE *fvstats=NULL; + char filename[40]; + time_t today2; + struct tm *today; + AVCodecContext *enc; + int frame_number; + int64_t ti; + double ti1, bitrate, avg_bitrate; + + if (!fvstats) { + today2 = time(NULL); + today = localtime(&today2); + snprintf(filename, sizeof(filename), "vstats_%02d%02d%02d.log", today->tm_hour, + today->tm_min, + today->tm_sec); + fvstats = fopen(filename,"w"); + if (!fvstats) { + perror("fopen"); + exit(1); + } + } + + ti = MAXINT64; + enc = ost->st->codec; + if (enc->codec_type == CODEC_TYPE_VIDEO) { + frame_number = ost->frame_number; + fprintf(fvstats, "frame= %5d q= %2.1f ", frame_number, enc->coded_frame->quality/(float)FF_QP2LAMBDA); + if (enc->flags&CODEC_FLAG_PSNR) + fprintf(fvstats, "PSNR= %6.2f ", psnr(enc->coded_frame->error[0]/(enc->width*enc->height*255.0*255.0))); + + fprintf(fvstats,"f_size= %6d ", frame_size); + /* compute pts value */ + ti1 = ost->sync_opts * av_q2d(enc->time_base); + if (ti1 < 0.01) + ti1 = 0.01; + + bitrate = (frame_size * 8) / av_q2d(enc->time_base) / 1000.0; + avg_bitrate = (double)(video_size * 8) / ti1 / 1000.0; + fprintf(fvstats, "s_size= %8.0fkB time= %0.3f br= %7.1fkbits/s avg_br= %7.1fkbits/s ", + (double)video_size / 1024, ti1, bitrate, avg_bitrate); + fprintf(fvstats,"type= %c\n", av_get_pict_type_char(enc->coded_frame->pict_type)); + } +} + +static void print_report(AVFormatContext **output_files, + AVOutputStream **ost_table, int nb_ostreams, + int is_last_report) +{ + char buf[1024]; + AVOutputStream *ost; + AVFormatContext *oc, *os; + int64_t total_size; + AVCodecContext *enc; + int frame_number, vid, i; + double bitrate, ti1, pts; + static int64_t last_time = -1; + static int qp_histogram[52]; + + if (!is_last_report) { + int64_t cur_time; + /* display the report every 0.5 seconds */ + cur_time = av_gettime(); + if (last_time == -1) { + last_time = cur_time; + return; + } + if ((cur_time - last_time) < 500000) + return; + last_time = cur_time; + } + + + oc = output_files[0]; + + total_size = url_ftell(&oc->pb); + + buf[0] = '\0'; + ti1 = 1e10; + vid = 0; + for(i=0;ifile_index]; + enc = ost->st->codec; + if (vid && enc->codec_type == CODEC_TYPE_VIDEO) { + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "q=%2.1f ", + enc->coded_frame->quality/(float)FF_QP2LAMBDA); + } + if (!vid && enc->codec_type == CODEC_TYPE_VIDEO) { + frame_number = ost->frame_number; + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "frame=%5d q=%3.1f ", + frame_number, enc->coded_frame ? enc->coded_frame->quality/(float)FF_QP2LAMBDA : -1); + if(is_last_report) + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "L"); + if(qp_hist && enc->coded_frame){ + int j; + int qp= lrintf(enc->coded_frame->quality/(float)FF_QP2LAMBDA); + if(qp>=0 && qpflags&CODEC_FLAG_PSNR){ + int j; + double error, error_sum=0; + double scale, scale_sum=0; + char type[3]= {'Y','U','V'}; + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "PSNR="); + for(j=0; j<3; j++){ + if(is_last_report){ + error= enc->error[j]; + scale= enc->width*enc->height*255.0*255.0*frame_number; + }else{ + error= enc->coded_frame->error[j]; + scale= enc->width*enc->height*255.0*255.0; + } + if(j) scale/=4; + error_sum += error; + scale_sum += scale; + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "%c:%2.2f ", type[j], psnr(error/scale)); + } + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), "*:%2.2f ", psnr(error_sum/scale_sum)); + } + vid = 1; + } + /* compute min output value */ + pts = (double)ost->st->pts.val * ost->st->time_base.num / ost->st->time_base.den; + if ((pts < ti1) && (pts > 0)) + ti1 = pts; + } + if (ti1 < 0.01) + ti1 = 0.01; + + if (verbose || is_last_report) { + bitrate = (double)(total_size * 8) / ti1 / 1000.0; + + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), + "size=%8.0fkB time=%0.1f bitrate=%6.1fkbits/s", + (double)total_size / 1024, ti1, bitrate); + + if (verbose > 1) + snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), " dup=%d drop=%d", + nb_frames_dup, nb_frames_drop); + + if (verbose >= 0) + fprintf(stderr, "%s \r", buf); + + fflush(stderr); + } + + if (is_last_report && verbose >= 0){ + int64_t raw= audio_size + video_size + extra_size; + fprintf(stderr, "\n"); + fprintf(stderr, "video:%1.0fkB audio:%1.0fkB global headers:%1.0fkB muxing overhead %f%%\n", + video_size/1024.0, + audio_size/1024.0, + extra_size/1024.0, + 100.0*(total_size - raw)/raw + ); + } +} + +/* pkt = NULL means EOF (needed to flush decoder buffers) */ +static int output_packet(AVInputStream *ist, int ist_index, + AVOutputStream **ost_table, int nb_ostreams, + const AVPacket *pkt) +{ + AVFormatContext *os; + AVOutputStream *ost; + uint8_t *ptr; + int len, ret, i; + uint8_t *data_buf; + int data_size, got_picture; + AVFrame picture; + void *buffer_to_free; + static unsigned int samples_size= 0; + static short *samples= NULL; + AVSubtitle subtitle, *subtitle_to_free; + int got_subtitle; + + if(!pkt){ + ist->pts= ist->next_pts; // needed for last packet if vsync=0 + } else if (pkt->dts != AV_NOPTS_VALUE) { //FIXME seems redundant, as libavformat does this too + ist->next_pts = ist->pts = av_rescale_q(pkt->dts, ist->st->time_base, AV_TIME_BASE_Q); + } else { +// assert(ist->pts == ist->next_pts); + } + + if (pkt == NULL) { + /* EOF handling */ + ptr = NULL; + len = 0; + goto handle_eof; + } + + len = pkt->size; + ptr = pkt->data; + while (len > 0) { + handle_eof: + /* decode the packet if needed */ + data_buf = NULL; /* fail safe */ + data_size = 0; + subtitle_to_free = NULL; + if (ist->decoding_needed) { + switch(ist->st->codec->codec_type) { + case CODEC_TYPE_AUDIO:{ + if(pkt) + samples= av_fast_realloc(samples, &samples_size, FFMAX(pkt->size, AVCODEC_MAX_AUDIO_FRAME_SIZE)); + /* XXX: could avoid copy if PCM 16 bits with same + endianness as CPU */ + ret = avcodec_decode_audio(ist->st->codec, samples, &data_size, + ptr, len); + if (ret < 0) + goto fail_decode; + ptr += ret; + len -= ret; + /* Some bug in mpeg audio decoder gives */ + /* data_size < 0, it seems they are overflows */ + if (data_size <= 0) { + /* no audio frame */ + continue; + } + data_buf = (uint8_t *)samples; + ist->next_pts += ((int64_t)AV_TIME_BASE/2 * data_size) / + (ist->st->codec->sample_rate * ist->st->codec->channels); + break;} + case CODEC_TYPE_VIDEO: + data_size = (ist->st->codec->width * ist->st->codec->height * 3) / 2; + /* XXX: allocate picture correctly */ + avcodec_get_frame_defaults(&picture); + + ret = avcodec_decode_video(ist->st->codec, + &picture, &got_picture, ptr, len); + ist->st->quality= picture.quality; + if (ret < 0) + goto fail_decode; + if (!got_picture) { + /* no picture yet */ + goto discard_packet; + } + if (ist->st->codec->time_base.num != 0) { + ist->next_pts += ((int64_t)AV_TIME_BASE * + ist->st->codec->time_base.num) / + ist->st->codec->time_base.den; + } + len = 0; + break; + case CODEC_TYPE_SUBTITLE: + ret = avcodec_decode_subtitle(ist->st->codec, + &subtitle, &got_subtitle, ptr, len); + if (ret < 0) + goto fail_decode; + if (!got_subtitle) { + goto discard_packet; + } + subtitle_to_free = &subtitle; + len = 0; + break; + default: + goto fail_decode; + } + } else { + switch(ist->st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + ist->next_pts += ((int64_t)AV_TIME_BASE * ist->st->codec->frame_size) / + (ist->st->codec->sample_rate * ist->st->codec->channels); + break; + case CODEC_TYPE_VIDEO: + if (ist->st->codec->time_base.num != 0) { + ist->next_pts += ((int64_t)AV_TIME_BASE * + ist->st->codec->time_base.num) / + ist->st->codec->time_base.den; + } + break; + } + data_buf = ptr; + data_size = len; + ret = len; + len = 0; + } + + buffer_to_free = NULL; + if (ist->st->codec->codec_type == CODEC_TYPE_VIDEO) { + pre_process_video_frame(ist, (AVPicture *)&picture, + &buffer_to_free); + } + + // preprocess audio (volume) + if (ist->st->codec->codec_type == CODEC_TYPE_AUDIO) { + if (audio_volume != 256) { + short *volp; + volp = samples; + for(i=0;i<(data_size / sizeof(short));i++) { + int v = ((*volp) * audio_volume + 128) >> 8; + if (v < -32768) v = -32768; + if (v > 32767) v = 32767; + *volp++ = v; + } + } + } + + /* frame rate emulation */ + if (ist->st->codec->rate_emu) { + int64_t pts = av_rescale((int64_t) ist->frame * ist->st->codec->time_base.num, 1000000, ist->st->codec->time_base.den); + int64_t now = av_gettime() - ist->start; + if (pts > now) + usleep(pts - now); + + ist->frame++; + } + +#if 0 + /* mpeg PTS deordering : if it is a P or I frame, the PTS + is the one of the next displayed one */ + /* XXX: add mpeg4 too ? */ + if (ist->st->codec->codec_id == CODEC_ID_MPEG1VIDEO) { + if (ist->st->codec->pict_type != B_TYPE) { + int64_t tmp; + tmp = ist->last_ip_pts; + ist->last_ip_pts = ist->frac_pts.val; + ist->frac_pts.val = tmp; + } + } +#endif + /* if output time reached then transcode raw format, + encode packets and output them */ + if (start_time == 0 || ist->pts >= start_time) + for(i=0;isource_index == ist_index) { + os = output_files[ost->file_index]; + +#if 0 + printf("%d: got pts=%0.3f %0.3f\n", i, + (double)pkt->pts / AV_TIME_BASE, + ((double)ist->pts / AV_TIME_BASE) - + ((double)ost->st->pts.val * ost->st->time_base.num / ost->st->time_base.den)); +#endif + /* set the input output pts pairs */ + //ost->sync_ipts = (double)(ist->pts + input_files_ts_offset[ist->file_index] - start_time)/ AV_TIME_BASE; + + if (ost->encoding_needed) { + switch(ost->st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + do_audio_out(os, ost, ist, data_buf, data_size); + break; + case CODEC_TYPE_VIDEO: + do_video_out(os, ost, ist, &picture, &frame_size); + video_size += frame_size; + if (do_vstats && frame_size) + do_video_stats(os, ost, frame_size); + break; + case CODEC_TYPE_SUBTITLE: + do_subtitle_out(os, ost, ist, &subtitle, + pkt->pts); + break; + default: + av_abort(); + } + } else { + AVFrame avframe; //FIXME/XXX remove this + AVPacket opkt; + av_init_packet(&opkt); + + /* no reencoding needed : output the packet directly */ + /* force the input stream PTS */ + + avcodec_get_frame_defaults(&avframe); + ost->st->codec->coded_frame= &avframe; + avframe.key_frame = pkt->flags & PKT_FLAG_KEY; + + if(ost->st->codec->codec_type == CODEC_TYPE_AUDIO) + audio_size += data_size; + else if (ost->st->codec->codec_type == CODEC_TYPE_VIDEO) { + video_size += data_size; + ost->sync_opts++; + } + + opkt.stream_index= ost->index; + if(pkt->pts != AV_NOPTS_VALUE) + opkt.pts= av_rescale_q(av_rescale_q(pkt->pts, ist->st->time_base, AV_TIME_BASE_Q) + input_files_ts_offset[ist->file_index], AV_TIME_BASE_Q, ost->st->time_base); + else + opkt.pts= AV_NOPTS_VALUE; + + { + int64_t dts; + if (pkt->dts == AV_NOPTS_VALUE) + dts = ist->next_pts; + else + dts= av_rescale_q(pkt->dts, ist->st->time_base, AV_TIME_BASE_Q); + opkt.dts= av_rescale_q(dts + input_files_ts_offset[ist->file_index], AV_TIME_BASE_Q, ost->st->time_base); + } + opkt.flags= pkt->flags; + if(av_parser_change(ist->st->parser, ost->st->codec, &opkt.data, &opkt.size, data_buf, data_size, pkt->flags & PKT_FLAG_KEY)) + opkt.destruct= av_destruct_packet; + av_interleaved_write_frame(os, &opkt); + ost->st->codec->frame_number++; + ost->frame_number++; + av_free_packet(&opkt); + } + } + } + av_free(buffer_to_free); + /* XXX: allocate the subtitles in the codec ? */ + if (subtitle_to_free) { + if (subtitle_to_free->rects != NULL) { + for (i = 0; i < subtitle_to_free->num_rects; i++) { + av_free(subtitle_to_free->rects[i].bitmap); + av_free(subtitle_to_free->rects[i].rgba_palette); + } + av_freep(&subtitle_to_free->rects); + } + subtitle_to_free->num_rects = 0; + subtitle_to_free = NULL; + } + } + discard_packet: + if (pkt == NULL) { + /* EOF handling */ + + for(i=0;isource_index == ist_index) { + AVCodecContext *enc= ost->st->codec; + os = output_files[ost->file_index]; + + if(ost->st->codec->codec_type == CODEC_TYPE_AUDIO && enc->frame_size <=1) + continue; + if(ost->st->codec->codec_type == CODEC_TYPE_VIDEO && (os->oformat->flags & AVFMT_RAWPICTURE)) + continue; + + if (ost->encoding_needed) { + for(;;) { + AVPacket pkt; + av_init_packet(&pkt); + pkt.stream_index= ost->index; + + switch(ost->st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + ret = avcodec_encode_audio(enc, bit_buffer, bit_buffer_size, NULL); + audio_size += ret; + pkt.flags |= PKT_FLAG_KEY; + break; + case CODEC_TYPE_VIDEO: + ret = avcodec_encode_video(enc, bit_buffer, bit_buffer_size, NULL); + video_size += ret; + if(enc->coded_frame && enc->coded_frame->key_frame) + pkt.flags |= PKT_FLAG_KEY; + if (ost->logfile && enc->stats_out) { + fprintf(ost->logfile, "%s", enc->stats_out); + } + break; + default: + ret=-1; + } + + if(ret<=0) + break; + pkt.data= bit_buffer; + pkt.size= ret; + if(enc->coded_frame && enc->coded_frame->pts != AV_NOPTS_VALUE) + pkt.pts= av_rescale_q(enc->coded_frame->pts, enc->time_base, ost->st->time_base); + av_interleaved_write_frame(os, &pkt); + } + } + } + } + } + + return 0; + fail_decode: + return -1; +} + + +/* + * The following code is the main loop of the file converter + */ +static int av_encode(AVFormatContext **output_files, + int nb_output_files, + AVFormatContext **input_files, + int nb_input_files, + AVStreamMap *stream_maps, int nb_stream_maps) +{ + int ret, i, j, k, n, nb_istreams = 0, nb_ostreams = 0; + AVFormatContext *is, *os; + AVCodecContext *codec, *icodec; + AVOutputStream *ost, **ost_table = NULL; + AVInputStream *ist, **ist_table = NULL; + AVInputFile *file_table; + AVFormatContext *stream_no_data; + int key; + + file_table= (AVInputFile*) av_mallocz(nb_input_files * sizeof(AVInputFile)); + if (!file_table) + goto fail; + + /* input stream init */ + j = 0; + for(i=0;inb_streams; + j += is->nb_streams; + } + nb_istreams = j; + + ist_table = av_mallocz(nb_istreams * sizeof(AVInputStream *)); + if (!ist_table) + goto fail; + + for(i=0;inb_streams;k++) { + ist = ist_table[j++]; + ist->st = is->streams[k]; + ist->file_index = i; + ist->index = k; + ist->discard = 1; /* the stream is discarded by default + (changed later) */ + + if (ist->st->codec->rate_emu) { + ist->start = av_gettime(); + ist->frame = 0; + } + } + } + + /* output stream init */ + nb_ostreams = 0; + for(i=0;inb_streams; + } + if (nb_stream_maps > 0 && nb_stream_maps != nb_ostreams) { + fprintf(stderr, "Number of stream maps must match number of output streams\n"); + exit(1); + } + + /* Sanity check the mapping args -- do the input files & streams exist? */ + for(i=0;i nb_input_files - 1 || + si < 0 || si > file_table[fi].nb_streams - 1) { + fprintf(stderr,"Could not find input stream #%d.%d\n", fi, si); + exit(1); + } + fi = stream_maps[i].sync_file_index; + si = stream_maps[i].sync_stream_index; + if (fi < 0 || fi > nb_input_files - 1 || + si < 0 || si > file_table[fi].nb_streams - 1) { + fprintf(stderr,"Could not find sync stream #%d.%d\n", fi, si); + exit(1); + } + } + + ost_table = av_mallocz(sizeof(AVOutputStream *) * nb_ostreams); + if (!ost_table) + goto fail; + for(i=0;inb_streams;i++) { + int found; + ost = ost_table[n++]; + ost->file_index = k; + ost->index = i; + ost->st = os->streams[i]; + if (nb_stream_maps > 0) { + ost->source_index = file_table[stream_maps[n-1].file_index].ist_index + + stream_maps[n-1].stream_index; + + /* Sanity check that the stream types match */ + if (ist_table[ost->source_index]->st->codec->codec_type != ost->st->codec->codec_type) { + fprintf(stderr, "Codec type mismatch for mapping #%d.%d -> #%d.%d\n", + stream_maps[n-1].file_index, stream_maps[n-1].stream_index, + ost->file_index, ost->index); + exit(1); + } + + } else { + /* get corresponding input stream index : we select the first one with the right type */ + found = 0; + for(j=0;jdiscard && + ist->st->codec->codec_type == ost->st->codec->codec_type) { + ost->source_index = j; + found = 1; + break; + } + } + + if (!found) { + /* try again and reuse existing stream */ + for(j=0;jst->codec->codec_type == ost->st->codec->codec_type) { + ost->source_index = j; + found = 1; + } + } + if (!found) { + fprintf(stderr, "Could not find input stream matching output stream #%d.%d\n", + ost->file_index, ost->index); + exit(1); + } + } + } + ist = ist_table[ost->source_index]; + ist->discard = 0; + ost->sync_ist = (nb_stream_maps > 0) ? + ist_table[file_table[stream_maps[n-1].sync_file_index].ist_index + + stream_maps[n-1].sync_stream_index] : ist; + } + } + + /* for each output stream, we compute the right encoding parameters */ + for(i=0;isource_index]; + + codec = ost->st->codec; + icodec = ist->st->codec; + + if (ost->st->stream_copy) { + /* if stream_copy is selected, no need to decode or encode */ + codec->codec_id = icodec->codec_id; + codec->codec_type = icodec->codec_type; + if(!codec->codec_tag) codec->codec_tag = icodec->codec_tag; + codec->bit_rate = icodec->bit_rate; + codec->extradata= icodec->extradata; + codec->extradata_size= icodec->extradata_size; + codec->time_base = icodec->time_base; + switch(codec->codec_type) { + case CODEC_TYPE_AUDIO: + codec->sample_rate = icodec->sample_rate; + codec->channels = icodec->channels; + codec->frame_size = icodec->frame_size; + codec->block_align= icodec->block_align; + break; + case CODEC_TYPE_VIDEO: + codec->pix_fmt = icodec->pix_fmt; + codec->width = icodec->width; + codec->height = icodec->height; + codec->has_b_frames = icodec->has_b_frames; + break; + case CODEC_TYPE_SUBTITLE: + break; + default: + av_abort(); + } + } else { + switch(codec->codec_type) { + case CODEC_TYPE_AUDIO: + if (fifo_init(&ost->fifo, 2 * MAX_AUDIO_PACKET_SIZE)) + goto fail; + + if (codec->channels == icodec->channels && + codec->sample_rate == icodec->sample_rate) { + ost->audio_resample = 0; + } else { + if (codec->channels != icodec->channels && + (icodec->codec_id == CODEC_ID_AC3 || + icodec->codec_id == CODEC_ID_DTS)) { + /* Special case for 5:1 AC3 and DTS input */ + /* and mono or stereo output */ + /* Request specific number of channels */ + icodec->channels = codec->channels; + if (codec->sample_rate == icodec->sample_rate) + ost->audio_resample = 0; + else { + ost->audio_resample = 1; + } + } else { + ost->audio_resample = 1; + } + } + if(audio_sync_method>1) + ost->audio_resample = 1; + + if(ost->audio_resample){ + ost->resample = audio_resample_init(codec->channels, icodec->channels, + codec->sample_rate, icodec->sample_rate); + if(!ost->resample){ + printf("Can't resample. Aborting.\n"); + av_abort(); + } + } + ist->decoding_needed = 1; + ost->encoding_needed = 1; + break; + case CODEC_TYPE_VIDEO: + ost->video_crop = ((frame_leftBand + frame_rightBand + frame_topBand + frame_bottomBand) != 0); + ost->video_pad = ((frame_padleft + frame_padright + frame_padtop + frame_padbottom) != 0); + ost->video_resample = ((codec->width != icodec->width - + (frame_leftBand + frame_rightBand) + + (frame_padleft + frame_padright)) || + (codec->height != icodec->height - + (frame_topBand + frame_bottomBand) + + (frame_padtop + frame_padbottom))); + if (ost->video_crop) { + ost->topBand = frame_topBand; + ost->leftBand = frame_leftBand; + } + if (ost->video_pad) { + ost->padtop = frame_padtop; + ost->padleft = frame_padleft; + ost->padbottom = frame_padbottom; + ost->padright = frame_padright; + if (!ost->video_resample) { + avcodec_get_frame_defaults(&ost->pict_tmp); + if( avpicture_alloc( (AVPicture*)&ost->pict_tmp, codec->pix_fmt, + codec->width, codec->height ) ) + goto fail; + } + } + if (ost->video_resample) { + avcodec_get_frame_defaults(&ost->pict_tmp); + if( avpicture_alloc( (AVPicture*)&ost->pict_tmp, PIX_FMT_YUV420P, + codec->width, codec->height ) ) + goto fail; + + ost->img_resample_ctx = img_resample_init( + codec->width - (frame_padleft + frame_padright), + codec->height - (frame_padtop + frame_padbottom), + icodec->width - (frame_leftBand + frame_rightBand), + icodec->height - (frame_topBand + frame_bottomBand)); + + } + ost->encoding_needed = 1; + ist->decoding_needed = 1; + break; + case CODEC_TYPE_SUBTITLE: + ost->encoding_needed = 1; + ist->decoding_needed = 1; + break; + default: + av_abort(); + break; + } + /* two pass mode */ + if (ost->encoding_needed && + (codec->flags & (CODEC_FLAG_PASS1 | CODEC_FLAG_PASS2))) { + char logfilename[1024]; + FILE *f; + int size; + char *logbuffer; + + snprintf(logfilename, sizeof(logfilename), "%s-%d.log", + pass_logfilename ? + pass_logfilename : DEFAULT_PASS_LOGFILENAME, i); + if (codec->flags & CODEC_FLAG_PASS1) { + f = fopen(logfilename, "w"); + if (!f) { + perror(logfilename); + exit(1); + } + ost->logfile = f; + } else { + /* read the log file */ + f = fopen(logfilename, "r"); + if (!f) { + perror(logfilename); + exit(1); + } + fseek(f, 0, SEEK_END); + size = ftell(f); + fseek(f, 0, SEEK_SET); + logbuffer = av_malloc(size + 1); + if (!logbuffer) { + fprintf(stderr, "Could not allocate log buffer\n"); + exit(1); + } + size = fread(logbuffer, 1, size, f); + fclose(f); + logbuffer[size] = '\0'; + codec->stats_in = logbuffer; + } + } + } + if(codec->codec_type == CODEC_TYPE_VIDEO){ + int size= codec->width * codec->height; + bit_buffer_size= FFMAX(bit_buffer_size, 4*size); + } + } + + if (!bit_buffer) + bit_buffer = av_malloc(bit_buffer_size); + if (!bit_buffer) + goto fail; + + /* dump the file output parameters - cannot be done before in case + of stream copy */ + for(i=0;ifilename, 1); + } + + /* dump the stream mapping */ + if (verbose >= 0) { + fprintf(stderr, "Stream mapping:\n"); + for(i=0;i #%d.%d", + ist_table[ost->source_index]->file_index, + ist_table[ost->source_index]->index, + ost->file_index, + ost->index); + if (ost->sync_ist != ist_table[ost->source_index]) + fprintf(stderr, " [sync #%d.%d]", + ost->sync_ist->file_index, + ost->sync_ist->index); + fprintf(stderr, "\n"); + } + } + + /* open each encoder */ + for(i=0;iencoding_needed) { + AVCodec *codec; + codec = avcodec_find_encoder(ost->st->codec->codec_id); + if (!codec) { + fprintf(stderr, "Unsupported codec for output stream #%d.%d\n", + ost->file_index, ost->index); + exit(1); + } + if (avcodec_open(ost->st->codec, codec) < 0) { + fprintf(stderr, "Error while opening codec for output stream #%d.%d - maybe incorrect parameters such as bit_rate, rate, width or height\n", + ost->file_index, ost->index); + exit(1); + } + extra_size += ost->st->codec->extradata_size; + } + } + + /* open each decoder */ + for(i=0;idecoding_needed) { + AVCodec *codec; + codec = avcodec_find_decoder(ist->st->codec->codec_id); + if (!codec) { + fprintf(stderr, "Unsupported codec (id=%d) for input stream #%d.%d\n", + ist->st->codec->codec_id, ist->file_index, ist->index); + exit(1); + } + if (avcodec_open(ist->st->codec, codec) < 0) { + fprintf(stderr, "Error while opening codec for input stream #%d.%d\n", + ist->file_index, ist->index); + exit(1); + } + //if (ist->st->codec->codec_type == CODEC_TYPE_VIDEO) + // ist->st->codec->flags |= CODEC_FLAG_REPEAT_FIELD; + } + } + + /* init pts */ + for(i=0;ifile_index]; + ist->pts = 0; + ist->next_pts = av_rescale_q(ist->st->start_time, ist->st->time_base, AV_TIME_BASE_Q); + if(ist->st->start_time == AV_NOPTS_VALUE) + ist->next_pts=0; + if(input_files_ts_offset[ist->file_index]) + ist->next_pts= AV_NOPTS_VALUE; + ist->is_start = 1; + } + + /* compute buffer size max (should use a complete heuristic) */ + for(i=0;i= nb_output_files ) { + fprintf(stderr, "Invalid output file index %d map_meta_data(%d,%d)\n", out_file_index, out_file_index, in_file_index); + ret = -EINVAL; + goto fail; + } + if ( in_file_index < 0 || in_file_index >= nb_input_files ) { + fprintf(stderr, "Invalid input file index %d map_meta_data(%d,%d)\n", in_file_index, out_file_index, in_file_index); + ret = -EINVAL; + goto fail; + } + + out_file = output_files[out_file_index]; + in_file = input_files[in_file_index]; + + strcpy(out_file->title, in_file->title); + strcpy(out_file->author, in_file->author); + strcpy(out_file->copyright, in_file->copyright); + strcpy(out_file->comment, in_file->comment); + strcpy(out_file->album, in_file->album); + out_file->year = in_file->year; + out_file->track = in_file->track; + strcpy(out_file->genre, in_file->genre); + } + + /* open files and write file headers */ + for(i=0;i= 0) { + fprintf(stderr, "Press [q] to stop encoding\n"); + url_set_interrupt_cb(decode_interrupt_cb); + } +#endif + term_init(); + + stream_no_data = 0; + key = -1; + + for(; received_sigterm == 0;) { + int file_index, ist_index; + AVPacket pkt; + double ipts_min; + double opts_min; + + redo: + ipts_min= 1e100; + opts_min= 1e100; + /* if 'q' pressed, exits */ + if (!using_stdin) { + if (q_pressed) + break; + /* read_key() returns 0 on EOF */ + key = read_key(); + if (key == 'q') + break; + } + + /* select the stream that we must read now by looking at the + smallest output pts */ + file_index = -1; + for(i=0;ifile_index]; + ist = ist_table[ost->source_index]; + if(ost->st->codec->codec_type == CODEC_TYPE_VIDEO) + opts = ost->sync_opts * av_q2d(ost->st->codec->time_base); + else + opts = ost->st->pts.val * av_q2d(ost->st->time_base); + ipts = (double)ist->pts; + if (!file_table[ist->file_index].eof_reached){ + if(ipts < ipts_min) { + ipts_min = ipts; + if(input_sync ) file_index = ist->file_index; + } + if(opts < opts_min) { + opts_min = opts; + if(!input_sync) file_index = ist->file_index; + } + } + if(ost->frame_number >= max_frames[ost->st->codec->codec_type]){ + file_index= -1; + break; + } + } + /* if none, if is finished */ + if (file_index < 0) { + break; + } + + /* finish if recording time exhausted */ + if (recording_time > 0 && opts_min >= (recording_time / 1000000.0)) + break; + + /* finish if limit size exhausted */ + if (limit_filesize != 0 && (limit_filesize * 1024) < url_ftell(&output_files[0]->pb)) + break; + + /* read a frame from it and output it in the fifo */ + is = input_files[file_index]; + if (av_read_frame(is, &pkt) < 0) { + file_table[file_index].eof_reached = 1; + if (opt_shortest) break; else continue; // + } + + if (!pkt.size) { + stream_no_data = is; + } else { + stream_no_data = 0; + } + if (do_pkt_dump) { + av_pkt_dump(stdout, &pkt, do_hex_dump); + } + /* the following test is needed in case new streams appear + dynamically in stream : we ignore them */ + if (pkt.stream_index >= file_table[file_index].nb_streams) + goto discard_packet; + ist_index = file_table[file_index].ist_index + pkt.stream_index; + ist = ist_table[ist_index]; + if (ist->discard) + goto discard_packet; + +// fprintf(stderr, "next:%lld dts:%lld off:%lld %d\n", ist->next_pts, pkt.dts, input_files_ts_offset[ist->file_index], ist->st->codec->codec_type); + if (pkt.dts != AV_NOPTS_VALUE && ist->next_pts != AV_NOPTS_VALUE) { + int64_t delta= av_rescale_q(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q) - ist->next_pts; + if(ABS(delta) > 1LL*dts_delta_threshold*AV_TIME_BASE && !copy_ts){ + input_files_ts_offset[ist->file_index]-= delta; + if (verbose > 2) + fprintf(stderr, "timestamp discontinuity %"PRId64", new offset= %"PRId64"\n", delta, input_files_ts_offset[ist->file_index]); + for(i=0; inext_pts += delta; + ist_table[index]->is_start=1; + } + } + } + + //fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->index, pkt.size); + if (output_packet(ist, ist_index, ost_table, nb_ostreams, &pkt) < 0) { + + if (verbose >= 0) + fprintf(stderr, "Error while decoding stream #%d.%d\n", + ist->file_index, ist->index); + + av_free_packet(&pkt); + goto redo; + } + + discard_packet: + av_free_packet(&pkt); + + /* dump report by using the output first video and audio streams */ + print_report(output_files, ost_table, nb_ostreams, 0); + } + + /* at the end of stream, we must flush the decoder buffers */ + for(i=0;idecoding_needed) { + output_packet(ist, i, ost_table, nb_ostreams, NULL); + } + } + + term_exit(); + + /* write the trailer if needed and close file */ + for(i=0;iencoding_needed) { + av_freep(&ost->st->codec->stats_in); + avcodec_close(ost->st->codec); + } + } + + /* close each decoder */ + for(i=0;idecoding_needed) { + avcodec_close(ist->st->codec); + } + } + + /* finished ! */ + + ret = 0; + fail1: + av_freep(&bit_buffer); + av_free(file_table); + + if (ist_table) { + for(i=0;ilogfile) { + fclose(ost->logfile); + ost->logfile = NULL; + } + fifo_free(&ost->fifo); /* works even if fifo is not + initialized but set to zero */ + av_free(ost->pict_tmp.data[0]); + if (ost->video_resample) + img_resample_close(ost->img_resample_ctx); + if (ost->audio_resample) + audio_resample_close(ost->resample); + av_free(ost); + } + } + av_free(ost_table); + } + return ret; + fail: + ret = -ENOMEM; + goto fail1; +} + +#if 0 +int file_read(const char *filename) +{ + URLContext *h; + unsigned char buffer[1024]; + int len, i; + + if (url_open(&h, filename, O_RDONLY) < 0) { + printf("could not open '%s'\n", filename); + return -1; + } + for(;;) { + len = url_read(h, buffer, sizeof(buffer)); + if (len <= 0) + break; + for(i=0;inext) { + if (!strcmp(arg, f->name)) + break; + } + if (!f) { + fprintf(stderr, "Unknown image format: '%s'\n", arg); + exit(1); + } + image_format = f; +} + +static void opt_format(const char *arg) +{ + /* compatibility stuff for pgmyuv */ + if (!strcmp(arg, "pgmyuv")) { + pgmyuv_compatibility_hack=1; +// opt_image_format(arg); + arg = "image2"; + } + + file_iformat = av_find_input_format(arg); + file_oformat = guess_format(arg, NULL, NULL); + if (!file_iformat && !file_oformat) { + fprintf(stderr, "Unknown input or output format: %s\n", arg); + exit(1); + } +} + +static void opt_video_bitrate(const char *arg) +{ + video_bit_rate = atoi(arg) * 1000; +} + +static void opt_video_bitrate_tolerance(const char *arg) +{ + video_bit_rate_tolerance = atoi(arg) * 1000; +} + +static void opt_video_bitrate_max(const char *arg) +{ + video_rc_max_rate = atoi(arg) * 1000; +} + +static void opt_video_bitrate_min(const char *arg) +{ + video_rc_min_rate = atoi(arg) * 1000; +} + +static void opt_video_buffer_size(const char *arg) +{ + video_rc_buffer_size = atoi(arg) * 8*1024; +} + +static void opt_video_rc_eq(char *arg) +{ + video_rc_eq = arg; +} + +static void opt_video_rc_override_string(char *arg) +{ + video_rc_override_string = arg; +} + + +static void opt_workaround_bugs(const char *arg) +{ + workaround_bugs = atoi(arg); +} + +static void opt_me_threshold(const char *arg) +{ + me_threshold = atoi(arg); +} + +static void opt_mb_threshold(const char *arg) +{ + mb_threshold = atoi(arg); +} + +static void opt_verbose(const char *arg) +{ + verbose = atoi(arg); + av_log_set_level(atoi(arg)); +} + +static void opt_frame_rate(const char *arg) +{ + if (parse_frame_rate(&frame_rate, &frame_rate_base, arg) < 0) { + fprintf(stderr, "Incorrect frame rate\n"); + exit(1); + } +} + +static void opt_frame_crop_top(const char *arg) +{ + frame_topBand = atoi(arg); + if (frame_topBand < 0) { + fprintf(stderr, "Incorrect top crop size\n"); + exit(1); + } + if ((frame_topBand % 2) != 0) { + fprintf(stderr, "Top crop size must be a multiple of 2\n"); + exit(1); + } + if ((frame_topBand) >= frame_height){ + fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); + exit(1); + } + frame_height -= frame_topBand; +} + +static void opt_frame_crop_bottom(const char *arg) +{ + frame_bottomBand = atoi(arg); + if (frame_bottomBand < 0) { + fprintf(stderr, "Incorrect bottom crop size\n"); + exit(1); + } + if ((frame_bottomBand % 2) != 0) { + fprintf(stderr, "Bottom crop size must be a multiple of 2\n"); + exit(1); + } + if ((frame_bottomBand) >= frame_height){ + fprintf(stderr, "Vertical crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); + exit(1); + } + frame_height -= frame_bottomBand; +} + +static void opt_frame_crop_left(const char *arg) +{ + frame_leftBand = atoi(arg); + if (frame_leftBand < 0) { + fprintf(stderr, "Incorrect left crop size\n"); + exit(1); + } + if ((frame_leftBand % 2) != 0) { + fprintf(stderr, "Left crop size must be a multiple of 2\n"); + exit(1); + } + if ((frame_leftBand) >= frame_width){ + fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); + exit(1); + } + frame_width -= frame_leftBand; +} + +static void opt_frame_crop_right(const char *arg) +{ + frame_rightBand = atoi(arg); + if (frame_rightBand < 0) { + fprintf(stderr, "Incorrect right crop size\n"); + exit(1); + } + if ((frame_rightBand % 2) != 0) { + fprintf(stderr, "Right crop size must be a multiple of 2\n"); + exit(1); + } + if ((frame_rightBand) >= frame_width){ + fprintf(stderr, "Horizontal crop dimensions are outside the range of the original image.\nRemember to crop first and scale second.\n"); + exit(1); + } + frame_width -= frame_rightBand; +} + +static void opt_frame_size(const char *arg) +{ + if (parse_image_size(&frame_width, &frame_height, arg) < 0) { + fprintf(stderr, "Incorrect frame size\n"); + exit(1); + } + if ((frame_width % 2) != 0 || (frame_height % 2) != 0) { + fprintf(stderr, "Frame size must be a multiple of 2\n"); + exit(1); + } +} + + +#define SCALEBITS 10 +#define ONE_HALF (1 << (SCALEBITS - 1)) +#define FIX(x) ((int) ((x) * (1<> SCALEBITS) + +#define RGB_TO_U(r1, g1, b1, shift)\ +(((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + \ + FIX(0.50000) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_V(r1, g1, b1, shift)\ +(((FIX(0.50000) * r1 - FIX(0.41869) * g1 - \ + FIX(0.08131) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +static void opt_pad_color(const char *arg) { + /* Input is expected to be six hex digits similar to + how colors are expressed in html tags (but without the #) */ + int rgb = strtol(arg, NULL, 16); + int r,g,b; + + r = (rgb >> 16); + g = ((rgb >> 8) & 255); + b = (rgb & 255); + + padcolor[0] = RGB_TO_Y(r,g,b); + padcolor[1] = RGB_TO_U(r,g,b,0); + padcolor[2] = RGB_TO_V(r,g,b,0); +} + +static void opt_frame_pad_top(const char *arg) +{ + frame_padtop = atoi(arg); + if (frame_padtop < 0) { + fprintf(stderr, "Incorrect top pad size\n"); + exit(1); + } + if ((frame_padtop % 2) != 0) { + fprintf(stderr, "Top pad size must be a multiple of 2\n"); + exit(1); + } +} + +static void opt_frame_pad_bottom(const char *arg) +{ + frame_padbottom = atoi(arg); + if (frame_padbottom < 0) { + fprintf(stderr, "Incorrect bottom pad size\n"); + exit(1); + } + if ((frame_padbottom % 2) != 0) { + fprintf(stderr, "Bottom pad size must be a multiple of 2\n"); + exit(1); + } +} + + +static void opt_frame_pad_left(const char *arg) +{ + frame_padleft = atoi(arg); + if (frame_padleft < 0) { + fprintf(stderr, "Incorrect left pad size\n"); + exit(1); + } + if ((frame_padleft % 2) != 0) { + fprintf(stderr, "Left pad size must be a multiple of 2\n"); + exit(1); + } +} + + +static void opt_frame_pad_right(const char *arg) +{ + frame_padright = atoi(arg); + if (frame_padright < 0) { + fprintf(stderr, "Incorrect right pad size\n"); + exit(1); + } + if ((frame_padright % 2) != 0) { + fprintf(stderr, "Right pad size must be a multiple of 2\n"); + exit(1); + } +} + + +static void opt_frame_pix_fmt(const char *arg) +{ + frame_pix_fmt = avcodec_get_pix_fmt(arg); +} + +static void opt_frame_aspect_ratio(const char *arg) +{ + int x = 0, y = 0; + double ar = 0; + const char *p; + + p = strchr(arg, ':'); + if (p) { + x = strtol(arg, (char **)&arg, 10); + if (arg == p) + y = strtol(arg+1, (char **)&arg, 10); + if (x > 0 && y > 0) + ar = (double)x / (double)y; + } else + ar = strtod(arg, (char **)&arg); + + if (!ar) { + fprintf(stderr, "Incorrect aspect ratio specification.\n"); + exit(1); + } + frame_aspect_ratio = ar; +} + +static void opt_gop_size(const char *arg) +{ + gop_size = atoi(arg); +} + +static void opt_b_frames(const char *arg) +{ + b_frames = atoi(arg); + if (b_frames > FF_MAX_B_FRAMES) { + fprintf(stderr, "\nCannot have more than %d B frames, increase FF_MAX_B_FRAMES.\n", FF_MAX_B_FRAMES); + exit(1); + } else if (b_frames < 1) { + fprintf(stderr, "\nNumber of B frames must be higher than 0\n"); + exit(1); + } +} + +static void opt_pre_me(const char *arg) +{ + pre_me = atoi(arg); +} + +static void opt_qscale(const char *arg) +{ + video_qscale = atof(arg); + if (video_qscale < 0.01 || + video_qscale > 255) { + fprintf(stderr, "qscale must be >= 0.01 and <= 255\n"); + exit(1); + } +} + +static void opt_qsquish(const char *arg) +{ + video_qsquish = atof(arg); + if (video_qsquish < 0.0 || + video_qsquish > 99.0) { + fprintf(stderr, "qsquish must be >= 0.0 and <= 99.0\n"); + exit(1); + } +} + +static void opt_lmax(const char *arg) +{ + video_lmax = atof(arg)*FF_QP2LAMBDA; +} + +static void opt_lmin(const char *arg) +{ + video_lmin = atof(arg)*FF_QP2LAMBDA; +} + +static void opt_qmin(const char *arg) +{ + video_qmin = atoi(arg); + if (video_qmin < 1 || + video_qmin > 51) { + fprintf(stderr, "qmin must be >= 1 and <= 51\n"); + exit(1); + } +} + +static void opt_qmax(const char *arg) +{ + video_qmax = atoi(arg); + if (video_qmax < 1 || + video_qmax > 51) { + fprintf(stderr, "qmax must be >= 1 and <= 51\n"); + exit(1); + } +} + +static void opt_mb_lmin(const char *arg) +{ + video_mb_lmin = atof(arg)*FF_QP2LAMBDA; + if (video_mb_lmin < 1 || + video_mb_lmin > FF_LAMBDA_MAX) { + fprintf(stderr, "mblmin must be >= 1 and <= %d\n", FF_LAMBDA_MAX / FF_QP2LAMBDA); + exit(1); + } +} + +static void opt_mb_lmax(const char *arg) +{ + video_mb_lmax = atof(arg)*FF_QP2LAMBDA; + if (video_mb_lmax < 1 || + video_mb_lmax > FF_LAMBDA_MAX) { + fprintf(stderr, "mblmax must be >= 1 and <= %d\n", FF_LAMBDA_MAX / FF_QP2LAMBDA); + exit(1); + } +} + +static void opt_qdiff(const char *arg) +{ + video_qdiff = atoi(arg); + if (video_qdiff < 0 || + video_qdiff > 31) { + fprintf(stderr, "qdiff must be >= 1 and <= 31\n"); + exit(1); + } +} + +static void opt_qblur(const char *arg) +{ + video_qblur = atof(arg); +} + +static void opt_qcomp(const char *arg) +{ + video_qcomp = atof(arg); +} + +static void opt_rc_initial_cplx(const char *arg) +{ + video_rc_initial_cplx = atof(arg); +} +static void opt_b_qfactor(const char *arg) +{ + video_b_qfactor = atof(arg); +} +static void opt_i_qfactor(const char *arg) +{ + video_i_qfactor = atof(arg); +} +static void opt_b_qoffset(const char *arg) +{ + video_b_qoffset = atof(arg); +} +static void opt_i_qoffset(const char *arg) +{ + video_i_qoffset = atof(arg); +} + +static void opt_ibias(const char *arg) +{ + video_intra_quant_bias = atoi(arg); +} +static void opt_pbias(const char *arg) +{ + video_inter_quant_bias = atoi(arg); +} + +static void opt_packet_size(const char *arg) +{ + packet_size= atoi(arg); +} + +static void opt_error_rate(const char *arg) +{ + error_rate= atoi(arg); +} + +static void opt_strict(const char *arg) +{ + strict= atoi(arg); +} + +static void opt_top_field_first(const char *arg) +{ + top_field_first= atoi(arg); +} + +static void opt_sc_threshold(const char *arg) +{ + sc_threshold= atoi(arg); +} + +static void opt_me_range(const char *arg) +{ + me_range = atoi(arg); +} + +static void opt_thread_count(const char *arg) +{ + thread_count= atoi(arg); +#if !defined(HAVE_THREADS) + if (verbose >= 0) + fprintf(stderr, "Warning: not compiled with thread support, using thread emulation\n"); +#endif +} + +static void opt_audio_bitrate(const char *arg) +{ + audio_bit_rate = atoi(arg) * 1000; +} + +static void opt_audio_rate(const char *arg) +{ + audio_sample_rate = atoi(arg); +} + +static void opt_audio_channels(const char *arg) +{ + audio_channels = atoi(arg); +} + +static void opt_video_device(const char *arg) +{ + video_device = av_strdup(arg); +} + +static void opt_grab_device(const char *arg) +{ + grab_device = av_strdup(arg); +} + +static void opt_video_channel(const char *arg) +{ + video_channel = strtol(arg, NULL, 0); +} + +static void opt_video_standard(const char *arg) +{ + video_standard = av_strdup(arg); +} + +static void opt_audio_device(const char *arg) +{ + audio_device = av_strdup(arg); +} + +static void opt_codec(int *pstream_copy, int *pcodec_id, + int codec_type, const char *arg) +{ + AVCodec *p; + + if (!strcmp(arg, "copy")) { + *pstream_copy = 1; + } else { + p = first_avcodec; + while (p) { + if (!strcmp(p->name, arg) && p->type == codec_type) + break; + p = p->next; + } + if (p == NULL) { + fprintf(stderr, "Unknown codec '%s'\n", arg); + exit(1); + } else { + *pcodec_id = p->id; + } + } +} + +static void opt_audio_codec(const char *arg) +{ + opt_codec(&audio_stream_copy, &audio_codec_id, CODEC_TYPE_AUDIO, arg); +} + +static void opt_audio_tag(const char *arg) +{ + char *tail; + audio_codec_tag= strtol(arg, &tail, 0); + + if(!tail || *tail) + audio_codec_tag= arg[0] + (arg[1]<<8) + (arg[2]<<16) + (arg[3]<<24); +} + +static void opt_video_tag(const char *arg) +{ + char *tail; + video_codec_tag= strtol(arg, &tail, 0); + + if(!tail || *tail) + video_codec_tag= arg[0] + (arg[1]<<8) + (arg[2]<<16) + (arg[3]<<24); +} + +static void add_frame_hooker(const char *arg) +{ + int argc = 0; + char *argv[64]; + int i; + char *args = av_strdup(arg); + + using_vhook = 1; + + argv[0] = strtok(args, " "); + while (argc < 62 && (argv[++argc] = strtok(NULL, " "))) { + } + + i = frame_hook_add(argc, argv); + + if (i != 0) { + fprintf(stderr, "Failed to add video hook function: %s\n", arg); + exit(1); + } +} + +const char *motion_str[] = { + "zero", + "full", + "log", + "phods", + "epzs", + "x1", + "hex", + "umh", + "iter", + NULL, +}; + +static void opt_motion_estimation(const char *arg) +{ + const char **p; + p = motion_str; + for(;;) { + if (!*p) { + fprintf(stderr, "Unknown motion estimation method '%s'\n", arg); + exit(1); + } + if (!strcmp(*p, arg)) + break; + p++; + } + me_method = (p - motion_str) + 1; +} + +static void opt_video_codec(const char *arg) +{ + opt_codec(&video_stream_copy, &video_codec_id, CODEC_TYPE_VIDEO, arg); +} + +static void opt_subtitle_codec(const char *arg) +{ + opt_codec(&subtitle_stream_copy, &subtitle_codec_id, CODEC_TYPE_SUBTITLE, arg); +} + +static void opt_map(const char *arg) +{ + AVStreamMap *m; + const char *p; + + p = arg; + m = &stream_maps[nb_stream_maps++]; + + m->file_index = strtol(arg, (char **)&p, 0); + if (*p) + p++; + + m->stream_index = strtol(p, (char **)&p, 0); + if (*p) { + p++; + m->sync_file_index = strtol(p, (char **)&p, 0); + if (*p) + p++; + m->sync_stream_index = strtol(p, (char **)&p, 0); + } else { + m->sync_file_index = m->file_index; + m->sync_stream_index = m->stream_index; + } +} + +static void opt_map_meta_data(const char *arg) +{ + AVMetaDataMap *m; + const char *p; + + p = arg; + m = &meta_data_maps[nb_meta_data_maps++]; + + m->out_file = strtol(arg, (char **)&p, 0); + if (*p) + p++; + + m->in_file = strtol(p, (char **)&p, 0); +} + +static void opt_recording_time(const char *arg) +{ + recording_time = parse_date(arg, 1); +} + +static void opt_start_time(const char *arg) +{ + start_time = parse_date(arg, 1); +} + +static void opt_rec_timestamp(const char *arg) +{ + rec_timestamp = parse_date(arg, 0) / 1000000; +} + +static void opt_input_ts_offset(const char *arg) +{ + input_ts_offset = parse_date(arg, 1); +} + +static void opt_input_file(const char *filename) +{ + AVFormatContext *ic; + AVFormatParameters params, *ap = ¶ms; + int err, i, ret, rfps, rfps_base; + int64_t timestamp; + + if (!strcmp(filename, "-")) + filename = "pipe:"; + + using_stdin |= !strncmp(filename, "pipe:", 5) || + !strcmp( filename, "/dev/stdin" ); + + /* get default parameters from command line */ + memset(ap, 0, sizeof(*ap)); + ap->sample_rate = audio_sample_rate; + ap->channels = audio_channels; + ap->time_base.den = frame_rate; + ap->time_base.num = frame_rate_base; + ap->width = frame_width + frame_padleft + frame_padright; + ap->height = frame_height + frame_padtop + frame_padbottom; + ap->image_format = image_format; + ap->pix_fmt = frame_pix_fmt; + ap->device = grab_device; + ap->channel = video_channel; + ap->standard = video_standard; + ap->video_codec_id = video_codec_id; + ap->audio_codec_id = audio_codec_id; + if(pgmyuv_compatibility_hack) + ap->video_codec_id= CODEC_ID_PGMYUV; + + /* open the input file with generic libav function */ + err = av_open_input_file(&ic, filename, file_iformat, 0, ap); + if (err < 0) { + print_error(filename, err); + exit(1); + } + + if(genpts) + ic->flags|= AVFMT_FLAG_GENPTS; + + /* If not enough info to get the stream parameters, we decode the + first frames to get it. (used in mpeg case for example) */ + ret = av_find_stream_info(ic); + if (ret < 0 && verbose >= 0) { + fprintf(stderr, "%s: could not find codec parameters\n", filename); + exit(1); + } + + timestamp = start_time; + /* add the stream start time */ + if (ic->start_time != AV_NOPTS_VALUE) + timestamp += ic->start_time; + + /* if seeking requested, we execute it */ + if (start_time != 0) { + ret = av_seek_frame(ic, -1, timestamp, AVSEEK_FLAG_BACKWARD); + if (ret < 0) { + fprintf(stderr, "%s: could not seek to position %0.3f\n", + filename, (double)timestamp / AV_TIME_BASE); + } + /* reset seek info */ + start_time = 0; + } + + /* update the current parameters so that they match the one of the input stream */ + for(i=0;inb_streams;i++) { + int j; + AVCodecContext *enc = ic->streams[i]->codec; +#if defined(HAVE_THREADS) + if(thread_count>1) + avcodec_thread_init(enc, thread_count); +#endif + enc->thread_count= thread_count; + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + for(j=0; jflags&AV_OPT_FLAG_AUDIO_PARAM) && (opt->flags&AV_OPT_FLAG_DECODING_PARAM)) + av_set_double(enc, opt_names[j], d); + } + //fprintf(stderr, "\nInput Audio channels: %d", enc->channels); + audio_channels = enc->channels; + audio_sample_rate = enc->sample_rate; + if(audio_disable) + ic->streams[i]->discard= AVDISCARD_ALL; + break; + case CODEC_TYPE_VIDEO: + for(j=0; jflags&AV_OPT_FLAG_VIDEO_PARAM) && (opt->flags&AV_OPT_FLAG_DECODING_PARAM)) + av_set_double(enc, opt_names[j], d); + } + frame_height = enc->height; + frame_width = enc->width; + frame_aspect_ratio = av_q2d(enc->sample_aspect_ratio) * enc->width / enc->height; + frame_pix_fmt = enc->pix_fmt; + rfps = ic->streams[i]->r_frame_rate.num; + rfps_base = ic->streams[i]->r_frame_rate.den; + enc->workaround_bugs = workaround_bugs; + if(enc->lowres) enc->flags |= CODEC_FLAG_EMU_EDGE; + if(me_threshold) + enc->debug |= FF_DEBUG_MV; + + if (enc->time_base.den != rfps || enc->time_base.num != rfps_base) { + + if (verbose >= 0) + fprintf(stderr,"\nSeems that stream %d comes from film source: %2.2f (%d/%d) -> %2.2f (%d/%d)\n", + i, (float)enc->time_base.den / enc->time_base.num, enc->time_base.den, enc->time_base.num, + + (float)rfps / rfps_base, rfps, rfps_base); + } + /* update the current frame rate to match the stream frame rate */ + frame_rate = rfps; + frame_rate_base = rfps_base; + + enc->rate_emu = rate_emu; + if(video_disable) + ic->streams[i]->discard= AVDISCARD_ALL; + else if(video_discard) + ic->streams[i]->discard= video_discard; + break; + case CODEC_TYPE_DATA: + break; + case CODEC_TYPE_SUBTITLE: + break; + case CODEC_TYPE_UNKNOWN: + break; + default: + av_abort(); + } + } + + input_files[nb_input_files] = ic; + input_files_ts_offset[nb_input_files] = input_ts_offset - (copy_ts ? 0 : timestamp); + /* dump the file content */ + if (verbose >= 0) + dump_format(ic, nb_input_files, filename, 0); + + nb_input_files++; + file_iformat = NULL; + file_oformat = NULL; + image_format = NULL; + + grab_device = NULL; + video_channel = 0; + + rate_emu = 0; +} + +static void opt_grab(const char *arg) +{ + file_iformat = av_find_input_format(arg); + opt_input_file(""); +} + +static void check_audio_video_inputs(int *has_video_ptr, int *has_audio_ptr) +{ + int has_video, has_audio, i, j; + AVFormatContext *ic; + + has_video = 0; + has_audio = 0; + for(j=0;jnb_streams;i++) { + AVCodecContext *enc = ic->streams[i]->codec; + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + has_audio = 1; + break; + case CODEC_TYPE_VIDEO: + has_video = 1; + break; + case CODEC_TYPE_DATA: + case CODEC_TYPE_UNKNOWN: + case CODEC_TYPE_SUBTITLE: + break; + default: + av_abort(); + } + } + } + *has_video_ptr = has_video; + *has_audio_ptr = has_audio; +} + +static void new_video_stream(AVFormatContext *oc) +{ + AVStream *st; + AVCodecContext *video_enc; + int codec_id; + + st = av_new_stream(oc, oc->nb_streams); + if (!st) { + fprintf(stderr, "Could not alloc stream\n"); + exit(1); + } +#if defined(HAVE_THREADS) + if(thread_count>1) + avcodec_thread_init(st->codec, thread_count); +#endif + + video_enc = st->codec; + + if(video_codec_tag) + video_enc->codec_tag= video_codec_tag; + + if( (video_global_header&1) + || (video_global_header==0 && (oc->oformat->flags & AVFMT_GLOBALHEADER))){ + video_enc->flags |= CODEC_FLAG_GLOBAL_HEADER; + avctx_opts->flags|= CODEC_FLAG_GLOBAL_HEADER; + } + if(video_global_header&2){ + video_enc->flags2 |= CODEC_FLAG2_LOCAL_HEADER; + avctx_opts->flags2|= CODEC_FLAG2_LOCAL_HEADER; + } + + if (video_stream_copy) { + st->stream_copy = 1; + video_enc->codec_type = CODEC_TYPE_VIDEO; + } else { + char *p; + int i; + AVCodec *codec; + + codec_id = av_guess_codec(oc->oformat, NULL, oc->filename, NULL, CODEC_TYPE_VIDEO); + if (video_codec_id != CODEC_ID_NONE) + codec_id = video_codec_id; + + video_enc->codec_id = codec_id; + codec = avcodec_find_encoder(codec_id); + + for(i=0; iflags&AV_OPT_FLAG_VIDEO_PARAM) && (opt->flags&AV_OPT_FLAG_ENCODING_PARAM)) + av_set_double(video_enc, opt_names[i], d); + } + + video_enc->bit_rate = video_bit_rate; + video_enc->bit_rate_tolerance = video_bit_rate_tolerance; + video_enc->time_base.den = frame_rate; + video_enc->time_base.num = frame_rate_base; + if(codec && codec->supported_framerates){ + const AVRational *p= codec->supported_framerates; + AVRational req= (AVRational){frame_rate, frame_rate_base}; + const AVRational *best=NULL; + AVRational best_error= (AVRational){INT_MAX, 1}; + for(; p->den!=0; p++){ + AVRational error= av_sub_q(req, *p); + if(error.num <0) error.num *= -1; + if(av_cmp_q(error, best_error) < 0){ + best_error= error; + best= p; + } + } + video_enc->time_base.den= best->num; + video_enc->time_base.num= best->den; + } + + video_enc->width = frame_width + frame_padright + frame_padleft; + video_enc->height = frame_height + frame_padtop + frame_padbottom; + video_enc->sample_aspect_ratio = av_d2q(frame_aspect_ratio*frame_height/frame_width, 255); + video_enc->pix_fmt = frame_pix_fmt; + + if(codec && codec->pix_fmts){ + const enum PixelFormat *p= codec->pix_fmts; + for(; *p!=-1; p++){ + if(*p == video_enc->pix_fmt) + break; + } + if(*p == -1) + video_enc->pix_fmt = codec->pix_fmts[0]; + } + + if (!intra_only) + video_enc->gop_size = gop_size; + else + video_enc->gop_size = 0; + if (video_qscale || same_quality) { + video_enc->flags |= CODEC_FLAG_QSCALE; + video_enc->global_quality= + st->quality = FF_QP2LAMBDA * video_qscale; + } + + if(intra_matrix) + video_enc->intra_matrix = intra_matrix; + if(inter_matrix) + video_enc->inter_matrix = inter_matrix; + + video_enc->pre_me = pre_me; + + if (b_frames) { + video_enc->max_b_frames = b_frames; + video_enc->b_quant_factor = 2.0; + } + video_enc->qmin = video_qmin; + video_enc->qmax = video_qmax; + video_enc->lmin = video_lmin; + video_enc->lmax = video_lmax; + video_enc->rc_qsquish = video_qsquish; + video_enc->mb_lmin = video_mb_lmin; + video_enc->mb_lmax = video_mb_lmax; + video_enc->max_qdiff = video_qdiff; + video_enc->qblur = video_qblur; + video_enc->qcompress = video_qcomp; + video_enc->rc_eq = video_rc_eq; + video_enc->workaround_bugs = workaround_bugs; + video_enc->thread_count = thread_count; + p= video_rc_override_string; + for(i=0; p; i++){ + int start, end, q; + int e=sscanf(p, "%d,%d,%d", &start, &end, &q); + if(e!=3){ + fprintf(stderr, "error parsing rc_override\n"); + exit(1); + } + video_enc->rc_override= + av_realloc(video_enc->rc_override, + sizeof(RcOverride)*(i+1)); + video_enc->rc_override[i].start_frame= start; + video_enc->rc_override[i].end_frame = end; + if(q>0){ + video_enc->rc_override[i].qscale= q; + video_enc->rc_override[i].quality_factor= 1.0; + } + else{ + video_enc->rc_override[i].qscale= 0; + video_enc->rc_override[i].quality_factor= -q/100.0; + } + p= strchr(p, '/'); + if(p) p++; + } + video_enc->rc_override_count=i; + + video_enc->rc_max_rate = video_rc_max_rate; + video_enc->rc_min_rate = video_rc_min_rate; + video_enc->rc_buffer_size = video_rc_buffer_size; + video_enc->rc_initial_buffer_occupancy = video_rc_buffer_size*3/4; + video_enc->rc_buffer_aggressivity= video_rc_buffer_aggressivity; + video_enc->rc_initial_cplx= video_rc_initial_cplx; + video_enc->i_quant_factor = video_i_qfactor; + video_enc->b_quant_factor = video_b_qfactor; + video_enc->i_quant_offset = video_i_qoffset; + video_enc->b_quant_offset = video_b_qoffset; + video_enc->intra_quant_bias = video_intra_quant_bias; + video_enc->inter_quant_bias = video_inter_quant_bias; + video_enc->me_threshold= me_threshold; + video_enc->mb_threshold= mb_threshold; + video_enc->intra_dc_precision= intra_dc_precision - 8; + video_enc->strict_std_compliance = strict; + video_enc->error_rate = error_rate; + video_enc->scenechange_threshold= sc_threshold; + video_enc->me_range = me_range; + video_enc->me_penalty_compensation= me_penalty_compensation; + video_enc->frame_skip_threshold= frame_skip_threshold; + video_enc->frame_skip_factor= frame_skip_factor; + video_enc->frame_skip_exp= frame_skip_exp; + + if(packet_size){ + video_enc->rtp_mode= 1; + video_enc->rtp_payload_size= packet_size; + } + + if (do_psnr) + video_enc->flags|= CODEC_FLAG_PSNR; + + video_enc->me_method = me_method; + + /* two pass mode */ + if (do_pass) { + if (do_pass == 1) { + video_enc->flags |= CODEC_FLAG_PASS1; + } else { + video_enc->flags |= CODEC_FLAG_PASS2; + } + } + } + + /* reset some key parameters */ + video_disable = 0; + video_codec_id = CODEC_ID_NONE; + video_stream_copy = 0; +} + +static void new_audio_stream(AVFormatContext *oc) +{ + AVStream *st; + AVCodecContext *audio_enc; + int codec_id, i; + + st = av_new_stream(oc, oc->nb_streams); + if (!st) { + fprintf(stderr, "Could not alloc stream\n"); + exit(1); + } +#if defined(HAVE_THREADS) + if(thread_count>1) + avcodec_thread_init(st->codec, thread_count); +#endif + + audio_enc = st->codec; + audio_enc->codec_type = CODEC_TYPE_AUDIO; + + if(audio_codec_tag) + audio_enc->codec_tag= audio_codec_tag; + + if (oc->oformat->flags & AVFMT_GLOBALHEADER) { + audio_enc->flags |= CODEC_FLAG_GLOBAL_HEADER; + avctx_opts->flags|= CODEC_FLAG_GLOBAL_HEADER; + } + if (audio_stream_copy) { + st->stream_copy = 1; + audio_enc->channels = audio_channels; + } else { + codec_id = av_guess_codec(oc->oformat, NULL, oc->filename, NULL, CODEC_TYPE_AUDIO); + + for(i=0; iflags&AV_OPT_FLAG_AUDIO_PARAM) && (opt->flags&AV_OPT_FLAG_ENCODING_PARAM)) + av_set_double(audio_enc, opt_names[i], d); + } + + if (audio_codec_id != CODEC_ID_NONE) + codec_id = audio_codec_id; + audio_enc->codec_id = codec_id; + + audio_enc->bit_rate = audio_bit_rate; + if (audio_qscale > QSCALE_NONE) { + audio_enc->flags |= CODEC_FLAG_QSCALE; + audio_enc->global_quality = st->quality = FF_QP2LAMBDA * audio_qscale; + } + audio_enc->strict_std_compliance = strict; + audio_enc->thread_count = thread_count; + /* For audio codecs other than AC3 or DTS we limit */ + /* the number of coded channels to stereo */ + if (audio_channels > 2 && codec_id != CODEC_ID_AC3 + && codec_id != CODEC_ID_DTS) { + audio_enc->channels = 2; + } else + audio_enc->channels = audio_channels; + } + audio_enc->sample_rate = audio_sample_rate; + audio_enc->time_base= (AVRational){1, audio_sample_rate}; + if (audio_language) { + pstrcpy(st->language, sizeof(st->language), audio_language); + av_free(audio_language); + audio_language = NULL; + } + + /* reset some key parameters */ + audio_disable = 0; + audio_codec_id = CODEC_ID_NONE; + audio_stream_copy = 0; +} + +static void opt_new_subtitle_stream(void) +{ + AVFormatContext *oc; + AVStream *st; + AVCodecContext *subtitle_enc; + int i; + + if (nb_output_files <= 0) { + fprintf(stderr, "At least one output file must be specified\n"); + exit(1); + } + oc = output_files[nb_output_files - 1]; + + st = av_new_stream(oc, oc->nb_streams); + if (!st) { + fprintf(stderr, "Could not alloc stream\n"); + exit(1); + } + + subtitle_enc = st->codec; + subtitle_enc->codec_type = CODEC_TYPE_SUBTITLE; + if (subtitle_stream_copy) { + st->stream_copy = 1; + } else { + for(i=0; iflags&AV_OPT_FLAG_SUBTITLE_PARAM) && (opt->flags&AV_OPT_FLAG_ENCODING_PARAM)) + av_set_double(subtitle_enc, opt_names[i], d); + } + subtitle_enc->codec_id = subtitle_codec_id; + } + + if (subtitle_language) { + pstrcpy(st->language, sizeof(st->language), subtitle_language); + av_free(subtitle_language); + subtitle_language = NULL; + } + + subtitle_codec_id = CODEC_ID_NONE; + subtitle_stream_copy = 0; +} + +static void opt_new_audio_stream(void) +{ + AVFormatContext *oc; + if (nb_output_files <= 0) { + fprintf(stderr, "At least one output file must be specified\n"); + exit(1); + } + oc = output_files[nb_output_files - 1]; + new_audio_stream(oc); +} + +static void opt_new_video_stream(void) +{ + AVFormatContext *oc; + if (nb_output_files <= 0) { + fprintf(stderr, "At least one output file must be specified\n"); + exit(1); + } + oc = output_files[nb_output_files - 1]; + new_video_stream(oc); +} + +static void opt_output_file(const char *filename) +{ + AVFormatContext *oc; + int use_video, use_audio, input_has_video, input_has_audio; + AVFormatParameters params, *ap = ¶ms; + + if (!strcmp(filename, "-")) + filename = "pipe:"; + + oc = av_alloc_format_context(); + + if (!file_oformat) { + file_oformat = guess_format(NULL, filename, NULL); + if (!file_oformat) { + fprintf(stderr, "Unable for find a suitable output format for '%s'\n", + filename); + exit(1); + } + } + + oc->oformat = file_oformat; + pstrcpy(oc->filename, sizeof(oc->filename), filename); + + if (!strcmp(file_oformat->name, "ffm") && + strstart(filename, "http:", NULL)) { + /* special case for files sent to ffserver: we get the stream + parameters from ffserver */ + if (read_ffserver_streams(oc, filename) < 0) { + fprintf(stderr, "Could not read stream parameters from '%s'\n", filename); + exit(1); + } + } else { + use_video = file_oformat->video_codec != CODEC_ID_NONE || video_stream_copy || video_codec_id != CODEC_ID_NONE; + use_audio = file_oformat->audio_codec != CODEC_ID_NONE || audio_stream_copy || audio_codec_id != CODEC_ID_NONE; + + /* disable if no corresponding type found and at least one + input file */ + if (nb_input_files > 0) { + check_audio_video_inputs(&input_has_video, &input_has_audio); + if (!input_has_video) + use_video = 0; + if (!input_has_audio) + use_audio = 0; + } + + /* manual disable */ + if (audio_disable) { + use_audio = 0; + } + if (video_disable) { + use_video = 0; + } + + if (use_video) { + new_video_stream(oc); + } + + if (use_audio) { + new_audio_stream(oc); + } + + if (!oc->nb_streams) { + fprintf(stderr, "No audio or video streams available\n"); + exit(1); + } + + oc->timestamp = rec_timestamp; + + if (str_title) + pstrcpy(oc->title, sizeof(oc->title), str_title); + if (str_author) + pstrcpy(oc->author, sizeof(oc->author), str_author); + if (str_copyright) + pstrcpy(oc->copyright, sizeof(oc->copyright), str_copyright); + if (str_comment) + pstrcpy(oc->comment, sizeof(oc->comment), str_comment); + } + + output_files[nb_output_files++] = oc; + + /* check filename in case of an image number is expected */ + if (oc->oformat->flags & AVFMT_NEEDNUMBER) { + if (filename_number_test(oc->filename) < 0) { + print_error(oc->filename, AVERROR_NUMEXPECTED); + exit(1); + } + } + + if (!(oc->oformat->flags & AVFMT_NOFILE)) { + /* test if it already exists to avoid loosing precious files */ + if (!file_overwrite && + (strchr(filename, ':') == NULL || + strstart(filename, "file:", NULL))) { + if (url_exist(filename)) { + int c; + + if ( !using_stdin ) { + fprintf(stderr,"File '%s' already exists. Overwrite ? [y/N] ", filename); + fflush(stderr); + c = getchar(); + if (toupper(c) != 'Y') { + fprintf(stderr, "Not overwriting - exiting\n"); + exit(1); + } + } + else { + fprintf(stderr,"File '%s' already exists. Exiting.\n", filename); + exit(1); + } + } + } + + /* open the file */ + if (url_fopen(&oc->pb, filename, URL_WRONLY) < 0) { + fprintf(stderr, "Could not open '%s'\n", filename); + exit(1); + } + } + + memset(ap, 0, sizeof(*ap)); + ap->image_format = image_format; + if (av_set_parameters(oc, ap) < 0) { + fprintf(stderr, "%s: Invalid encoding parameters\n", + oc->filename); + exit(1); + } + + oc->packet_size= mux_packet_size; + oc->mux_rate= mux_rate; + oc->preload= (int)(mux_preload*AV_TIME_BASE); + oc->max_delay= (int)(mux_max_delay*AV_TIME_BASE); + oc->loop_output = loop_output; + + /* reset some options */ + file_oformat = NULL; + file_iformat = NULL; + image_format = NULL; +} + +/* prepare dummy protocols for grab */ +static void prepare_grab(void) +{ + int has_video, has_audio, i, j; + AVFormatContext *oc; + AVFormatContext *ic; + AVFormatParameters vp1, *vp = &vp1; + AVFormatParameters ap1, *ap = &ap1; + + /* see if audio/video inputs are needed */ + has_video = 0; + has_audio = 0; + memset(ap, 0, sizeof(*ap)); + memset(vp, 0, sizeof(*vp)); + vp->time_base.num= 1; + for(j=0;jnb_streams;i++) { + AVCodecContext *enc = oc->streams[i]->codec; + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + if (enc->sample_rate > ap->sample_rate) + ap->sample_rate = enc->sample_rate; + if (enc->channels > ap->channels) + ap->channels = enc->channels; + has_audio = 1; + break; + case CODEC_TYPE_VIDEO: + if (enc->width > vp->width) + vp->width = enc->width; + if (enc->height > vp->height) + vp->height = enc->height; + + if (vp->time_base.num*(int64_t)enc->time_base.den > enc->time_base.num*(int64_t)vp->time_base.den){ + vp->time_base = enc->time_base; + } + has_video = 1; + break; + default: + av_abort(); + } + } + } + + if (has_video == 0 && has_audio == 0) { + fprintf(stderr, "Output file must have at least one audio or video stream\n"); + exit(1); + } + + if (has_video) { + AVInputFormat *fmt1; + fmt1 = av_find_input_format(video_grab_format); + vp->device = video_device; + vp->channel = video_channel; + vp->standard = video_standard; + vp->pix_fmt = frame_pix_fmt; + if (av_open_input_file(&ic, "", fmt1, 0, vp) < 0) { + fprintf(stderr, "Could not find video grab device\n"); + exit(1); + } + /* If not enough info to get the stream parameters, we decode the + first frames to get it. */ + if ((ic->ctx_flags & AVFMTCTX_NOHEADER) && av_find_stream_info(ic) < 0) { + fprintf(stderr, "Could not find video grab parameters\n"); + exit(1); + } + /* by now video grab has one stream */ + ic->streams[0]->r_frame_rate.num = vp->time_base.den; + ic->streams[0]->r_frame_rate.den = vp->time_base.num; + input_files[nb_input_files] = ic; + + if (verbose >= 0) + dump_format(ic, nb_input_files, "", 0); + + nb_input_files++; + } + if (has_audio && audio_grab_format) { + AVInputFormat *fmt1; + fmt1 = av_find_input_format(audio_grab_format); + ap->device = audio_device; + if (av_open_input_file(&ic, "", fmt1, 0, ap) < 0) { + fprintf(stderr, "Could not find audio grab device\n"); + exit(1); + } + input_files[nb_input_files] = ic; + + if (verbose >= 0) + dump_format(ic, nb_input_files, "", 0); + + nb_input_files++; + } +} + +/* same option as mencoder */ +static void opt_pass(const char *pass_str) +{ + int pass; + pass = atoi(pass_str); + if (pass != 1 && pass != 2) { + fprintf(stderr, "pass number can be only 1 or 2\n"); + exit(1); + } + do_pass = pass; +} + +#if defined(CONFIG_WIN32) || defined(CONFIG_OS2) +static int64_t getutime(void) +{ + return av_gettime(); +} +#else +static int64_t getutime(void) +{ + struct rusage rusage; + + getrusage(RUSAGE_SELF, &rusage); + return (rusage.ru_utime.tv_sec * 1000000LL) + rusage.ru_utime.tv_usec; +} +#endif + +extern int ffm_nopts; + +static void show_formats(void) +{ + AVInputFormat *ifmt; + AVOutputFormat *ofmt; + AVImageFormat *image_fmt; + URLProtocol *up; + AVCodec *p, *p2; + const char **pp, *last_name; + + printf("File formats:\n"); + last_name= "000"; + for(;;){ + int decode=0; + int encode=0; + const char *name=NULL; + const char *long_name=NULL; + + for(ofmt = first_oformat; ofmt != NULL; ofmt = ofmt->next) { + if((name == NULL || strcmp(ofmt->name, name)<0) && + strcmp(ofmt->name, last_name)>0){ + name= ofmt->name; + long_name= ofmt->long_name; + encode=1; + } + } + for(ifmt = first_iformat; ifmt != NULL; ifmt = ifmt->next) { + if((name == NULL || strcmp(ifmt->name, name)<0) && + strcmp(ifmt->name, last_name)>0){ + name= ifmt->name; + long_name= ifmt->long_name; + encode=0; + } + if(name && strcmp(ifmt->name, name)==0) + decode=1; + } + if(name==NULL) + break; + last_name= name; + + printf( + " %s%s %-15s %s\n", + decode ? "D":" ", + encode ? "E":" ", + name, + long_name ? long_name:" "); + } + printf("\n"); + + printf("Image formats (filename extensions, if any, follow):\n"); + for(image_fmt = first_image_format; image_fmt != NULL; + image_fmt = image_fmt->next) { + printf( + " %s%s %-6s %s\n", + image_fmt->img_read ? "D":" ", + image_fmt->img_write ? "E":" ", + image_fmt->name, + image_fmt->extensions ? image_fmt->extensions:" "); + } + printf("\n"); + + printf("Codecs:\n"); + last_name= "000"; + for(;;){ + int decode=0; + int encode=0; + int cap=0; + const char *type_str; + + p2=NULL; + for(p = first_avcodec; p != NULL; p = p->next) { + if((p2==NULL || strcmp(p->name, p2->name)<0) && + strcmp(p->name, last_name)>0){ + p2= p; + decode= encode= cap=0; + } + if(p2 && strcmp(p->name, p2->name)==0){ + if(p->decode) decode=1; + if(p->encode) encode=1; + cap |= p->capabilities; + } + } + if(p2==NULL) + break; + last_name= p2->name; + + switch(p2->type) { + case CODEC_TYPE_VIDEO: + type_str = "V"; + break; + case CODEC_TYPE_AUDIO: + type_str = "A"; + break; + case CODEC_TYPE_SUBTITLE: + type_str = "S"; + break; + default: + type_str = "?"; + break; + } + printf( + " %s%s%s%s%s%s %s", + decode ? "D": (/*p2->decoder ? "d":*/" "), + encode ? "E":" ", + type_str, + cap & CODEC_CAP_DRAW_HORIZ_BAND ? "S":" ", + cap & CODEC_CAP_DR1 ? "D":" ", + cap & CODEC_CAP_TRUNCATED ? "T":" ", + p2->name); + /* if(p2->decoder && decode==0) + printf(" use %s for decoding", p2->decoder->name);*/ + printf("\n"); + } + printf("\n"); + + printf("Supported file protocols:\n"); + for(up = first_protocol; up != NULL; up = up->next) + printf(" %s:", up->name); + printf("\n"); + + printf("Frame size, frame rate abbreviations:\n ntsc pal qntsc qpal sntsc spal film ntsc-film sqcif qcif cif 4cif\n"); + printf("Motion estimation methods:\n"); + pp = motion_str; + while (*pp) { + printf(" %s", *pp); + if ((pp - motion_str + 1) == ME_ZERO) + printf("(fastest)"); + else if ((pp - motion_str + 1) == ME_FULL) + printf("(slowest)"); + else if ((pp - motion_str + 1) == ME_EPZS) + printf("(default)"); + pp++; + } + printf("\n\n"); + printf( +"Note, the names of encoders and decoders dont always match, so there are\n" +"several cases where the above table shows encoder only or decoder only entries\n" +"even though both encoding and decoding are supported for example, the h263\n" +"decoder corresponds to the h263 and h263p encoders, for file formats its even\n" +"worse\n"); + exit(1); +} + +static void parse_matrix_coeffs(uint16_t *dest, const char *str) +{ + int i; + const char *p = str; + for(i = 0;; i++) { + dest[i] = atoi(p); + if(i == 63) + break; + p = strchr(p, ','); + if(!p) { + fprintf(stderr, "Syntax error in matrix \"%s\" at coeff %d\n", str, i); + exit(1); + } + p++; + } +} + +static void opt_inter_matrix(const char *arg) +{ + inter_matrix = av_mallocz(sizeof(uint16_t) * 64); + parse_matrix_coeffs(inter_matrix, arg); +} + +static void opt_intra_matrix(const char *arg) +{ + intra_matrix = av_mallocz(sizeof(uint16_t) * 64); + parse_matrix_coeffs(intra_matrix, arg); +} + +static void opt_target(const char *arg) +{ + int norm = -1; + static const char *const frame_rates[] = {"25", "30000/1001", "24000/1001"}; + + if(!strncmp(arg, "pal-", 4)) { + norm = 0; + arg += 4; + } else if(!strncmp(arg, "ntsc-", 5)) { + norm = 1; + arg += 5; + } else if(!strncmp(arg, "film-", 5)) { + norm = 2; + arg += 5; + } else { + int fr; + /* Calculate FR via float to avoid int overflow */ + fr = (int)(frame_rate * 1000.0 / frame_rate_base); + if(fr == 25000) { + norm = 0; + } else if((fr == 29970) || (fr == 23976)) { + norm = 1; + } else { + /* Try to determine PAL/NTSC by peeking in the input files */ + if(nb_input_files) { + int i, j; + for(j = 0; j < nb_input_files; j++) { + for(i = 0; i < input_files[j]->nb_streams; i++) { + AVCodecContext *c = input_files[j]->streams[i]->codec; + if(c->codec_type != CODEC_TYPE_VIDEO) + continue; + fr = c->time_base.den * 1000 / c->time_base.num; + if(fr == 25000) { + norm = 0; + break; + } else if((fr == 29970) || (fr == 23976)) { + norm = 1; + break; + } + } + if(norm >= 0) + break; + } + } + } + if(verbose && norm >= 0) + fprintf(stderr, "Assuming %s for target.\n", norm ? "NTSC" : "PAL"); + } + + if(norm < 0) { + fprintf(stderr, "Could not determine norm (PAL/NTSC/NTSC-Film) for target.\n"); + fprintf(stderr, "Please prefix target with \"pal-\", \"ntsc-\" or \"film-\",\n"); + fprintf(stderr, "or set a framerate with \"-r xxx\".\n"); + exit(1); + } + + if(!strcmp(arg, "vcd")) { + + opt_video_codec("mpeg1video"); + opt_audio_codec("mp2"); + opt_format("vcd"); + + opt_frame_size(norm ? "352x240" : "352x288"); + opt_frame_rate(frame_rates[norm]); + opt_gop_size(norm ? "18" : "15"); + + video_bit_rate = 1150000; + video_rc_max_rate = 1150000; + video_rc_min_rate = 1150000; + video_rc_buffer_size = 40*1024*8; + + audio_bit_rate = 224000; + audio_sample_rate = 44100; + + mux_packet_size= 2324; + mux_rate= 2352 * 75 * 8; + + /* We have to offset the PTS, so that it is consistent with the SCR. + SCR starts at 36000, but the first two packs contain only padding + and the first pack from the other stream, respectively, may also have + been written before. + So the real data starts at SCR 36000+3*1200. */ + mux_preload= (36000+3*1200) / 90000.0; //0.44 + } else if(!strcmp(arg, "svcd")) { + + opt_video_codec("mpeg2video"); + opt_audio_codec("mp2"); + opt_format("svcd"); + + opt_frame_size(norm ? "480x480" : "480x576"); + opt_frame_rate(frame_rates[norm]); + opt_gop_size(norm ? "18" : "15"); + + video_bit_rate = 2040000; + video_rc_max_rate = 2516000; + video_rc_min_rate = 0; //1145000; + video_rc_buffer_size = 224*1024*8; + opt_default("flags", "+SCAN_OFFSET"); + + + audio_bit_rate = 224000; + audio_sample_rate = 44100; + + mux_packet_size= 2324; + + } else if(!strcmp(arg, "dvd")) { + + opt_video_codec("mpeg2video"); + opt_audio_codec("ac3"); + opt_format("dvd"); + + opt_frame_size(norm ? "720x480" : "720x576"); + opt_frame_rate(frame_rates[norm]); + opt_gop_size(norm ? "18" : "15"); + + video_bit_rate = 6000000; + video_rc_max_rate = 9000000; + video_rc_min_rate = 0; //1500000; + video_rc_buffer_size = 224*1024*8; + + mux_packet_size= 2048; // from www.mpucoder.com: DVD sectors contain 2048 bytes of data, this is also the size of one pack. + mux_rate = 10080000; // from mplex project: data_rate = 1260000. mux_rate = data_rate * 8 + + audio_bit_rate = 448000; + audio_sample_rate = 48000; + + } else if(!strncmp(arg, "dv", 2)) { + + opt_format("dv"); + + opt_frame_size(norm ? "720x480" : "720x576"); + opt_frame_pix_fmt(!strncmp(arg, "dv50", 4) ? "yuv422p" : + (norm ? "yuv411p" : "yuv420p")); + opt_frame_rate(frame_rates[norm]); + + audio_sample_rate = 48000; + audio_channels = 2; + + } else { + fprintf(stderr, "Unknown target: %s\n", arg); + exit(1); + } +} + +static void show_version(void) +{ + /* TODO: add function interface to avutil and avformat */ + fprintf(stderr, "ffmpeg " FFMPEG_VERSION "\n" + "libavutil %d\n" + "libavcodec %d\n" + "libavformat %d\n", + LIBAVUTIL_BUILD, avcodec_build(), LIBAVFORMAT_BUILD); + exit(1); +} + +static int opt_default(const char *opt, const char *arg){ + AVOption *o= av_set_string(avctx_opts, opt, arg); + if(!o) + return -1; + +// av_log(NULL, AV_LOG_ERROR, "%s:%s: %f 0x%0X\n", opt, arg, av_get_double(avctx_opts, opt, NULL), (int)av_get_int(avctx_opts, opt, NULL)); + + //FIXME we should always use avctx_opts, ... for storing options so there wont be any need to keep track of whats set over this + opt_names= av_realloc(opt_names, sizeof(void*)*(opt_name_count+1)); + opt_names[opt_name_count++]= o->name; + + /* disable generate of real time pts in ffm (need to be supressed anyway) */ + if(avctx_opts->flags & CODEC_FLAG_BITEXACT) + ffm_nopts = 1; + + if(avctx_opts->debug) + av_log_set_level(AV_LOG_DEBUG); + return 0; +} + +const OptionDef options[] = { + /* main options */ + { "L", 0, {(void*)show_license}, "show license" }, + { "h", 0, {(void*)show_help}, "show help" }, + { "version", 0, {(void*)show_version}, "show version" }, + { "formats", 0, {(void*)show_formats}, "show available formats, codecs, protocols, ..." }, + { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" }, + { "img", HAS_ARG, {(void*)opt_image_format}, "force image format", "img_fmt" }, + { "i", HAS_ARG, {(void*)opt_input_file}, "input file name", "filename" }, + { "y", OPT_BOOL, {(void*)&file_overwrite}, "overwrite output files" }, + { "map", HAS_ARG | OPT_EXPERT, {(void*)opt_map}, "set input stream mapping", "file:stream[:syncfile:syncstream]" }, + { "map_meta_data", HAS_ARG | OPT_EXPERT, {(void*)opt_map_meta_data}, "set meta data information of outfile from infile", "outfile:infile" }, + { "t", HAS_ARG, {(void*)opt_recording_time}, "set the recording time", "duration" }, + { "fs", HAS_ARG | OPT_INT, {(void*)&limit_filesize}, "set the limit file size", "limit_size" }, // + { "ss", HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" }, + { "itsoffset", HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" }, + { "title", HAS_ARG | OPT_STRING, {(void*)&str_title}, "set the title", "string" }, + { "timestamp", HAS_ARG, {(void*)&opt_rec_timestamp}, "set the timestamp", "time" }, + { "author", HAS_ARG | OPT_STRING, {(void*)&str_author}, "set the author", "string" }, + { "copyright", HAS_ARG | OPT_STRING, {(void*)&str_copyright}, "set the copyright", "string" }, + { "comment", HAS_ARG | OPT_STRING, {(void*)&str_comment}, "set the comment", "string" }, + { "benchmark", OPT_BOOL | OPT_EXPERT, {(void*)&do_benchmark}, + "add timings for benchmarking" }, + { "dump", OPT_BOOL | OPT_EXPERT, {(void*)&do_pkt_dump}, + "dump each input packet" }, + { "hex", OPT_BOOL | OPT_EXPERT, {(void*)&do_hex_dump}, + "when dumping packets, also dump the payload" }, + { "re", OPT_BOOL | OPT_EXPERT, {(void*)&rate_emu}, "read input at native frame rate", "" }, + { "loop_input", OPT_BOOL | OPT_EXPERT, {(void*)&loop_input}, "loop (current only works with images)" }, + { "loop_output", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&loop_output}, "number of times to loop output in formats that support looping (0 loops forever)", "" }, + { "v", HAS_ARG, {(void*)opt_verbose}, "control amount of logging", "verbose" }, + { "target", HAS_ARG, {(void*)opt_target}, "specify target file type (\"vcd\", \"svcd\", \"dvd\", \"dv\", \"dv50\", \"pal-vcd\", \"ntsc-svcd\", ...)", "type" }, + { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, + { "vsync", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_sync_method}, "video sync method", "" }, + { "async", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&audio_sync_method}, "audio sync method", "" }, + { "vglobal", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&video_global_header}, "video global header storage type", "" }, + { "copyts", OPT_BOOL | OPT_EXPERT, {(void*)©_ts}, "copy timestamps" }, + { "shortest", OPT_BOOL | OPT_EXPERT, {(void*)&opt_shortest}, "finish encoding within shortest input" }, // + { "dts_delta_threshold", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&dts_delta_threshold}, "timestamp discontinuity delta threshold", "" }, + + /* video options */ + { "b", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate}, "set video bitrate (in kbit/s)", "bitrate" }, + { "vframes", OPT_INT | HAS_ARG | OPT_VIDEO, {(void*)&max_frames[CODEC_TYPE_VIDEO]}, "set the number of video frames to record", "number" }, + { "aframes", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&max_frames[CODEC_TYPE_AUDIO]}, "set the number of audio frames to record", "number" }, + { "dframes", OPT_INT | HAS_ARG, {(void*)&max_frames[CODEC_TYPE_DATA]}, "set the number of data frames to record", "number" }, + { "r", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_rate}, "set frame rate (Hz value, fraction or abbreviation)", "rate" }, + { "s", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_size}, "set frame size (WxH or abbreviation)", "size" }, + { "aspect", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_aspect_ratio}, "set aspect ratio (4:3, 16:9 or 1.3333, 1.7777)", "aspect" }, + { "pix_fmt", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_frame_pix_fmt}, "set pixel format", "format" }, + { "croptop", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop_top}, "set top crop band size (in pixels)", "size" }, + { "cropbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop_bottom}, "set bottom crop band size (in pixels)", "size" }, + { "cropleft", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop_left}, "set left crop band size (in pixels)", "size" }, + { "cropright", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_crop_right}, "set right crop band size (in pixels)", "size" }, + { "padtop", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_pad_top}, "set top pad band size (in pixels)", "size" }, + { "padbottom", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_pad_bottom}, "set bottom pad band size (in pixels)", "size" }, + { "padleft", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_pad_left}, "set left pad band size (in pixels)", "size" }, + { "padright", HAS_ARG | OPT_VIDEO, {(void*)opt_frame_pad_right}, "set right pad band size (in pixels)", "size" }, + { "padcolor", HAS_ARG | OPT_VIDEO, {(void*)opt_pad_color}, "set color of pad bands (Hex 000000 thru FFFFFF)", "color" }, + { "g", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_gop_size}, "set the group of picture size", "gop_size" }, + { "intra", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_only}, "use only intra frames"}, + { "vn", OPT_BOOL | OPT_VIDEO, {(void*)&video_disable}, "disable video" }, + { "vdt", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&video_discard}, "discard threshold", "n" }, + { "qscale", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qscale}, "use fixed video quantiser scale (VBR)", "q" }, + { "qmin", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qmin}, "min video quantiser scale (VBR)", "q" }, + { "qmax", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qmax}, "max video quantiser scale (VBR)", "q" }, + { "lmin", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_lmin}, "min video lagrange factor (VBR)", "lambda" }, + { "lmax", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_lmax}, "max video lagrange factor (VBR)", "lambda" }, + { "mblmin", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_lmin}, "min macroblock quantiser scale (VBR)", "q" }, + { "mblmax", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_lmax}, "max macroblock quantiser scale (VBR)", "q" }, + { "qdiff", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qdiff}, "max difference between the quantiser scale (VBR)", "q" }, + { "qblur", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qblur}, "video quantiser scale blur (VBR)", "blur" }, + { "qsquish", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qsquish}, "how to keep quantiser between qmin and qmax (0 = clip, 1 = use differentiable function)", "squish" }, + { "qcomp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_qcomp}, "video quantiser scale compression (VBR)", "compression" }, + { "rc_init_cplx", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_rc_initial_cplx}, "initial complexity for 1-pass encoding", "complexity" }, + { "b_qfactor", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_b_qfactor}, "qp factor between p and b frames", "factor" }, + { "i_qfactor", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_i_qfactor}, "qp factor between p and i frames", "factor" }, + { "b_qoffset", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_b_qoffset}, "qp offset between p and b frames", "offset" }, + { "i_qoffset", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_i_qoffset}, "qp offset between p and i frames", "offset" }, + { "ibias", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_ibias}, "intra quant bias", "bias" }, + { "pbias", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_pbias}, "inter quant bias", "bias" }, + { "rc_eq", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_eq}, "set rate control equation", "equation" }, + { "rc_override", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_override_string}, "rate control override for specific intervals", "override" }, + { "bt", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_tolerance}, "set video bitrate tolerance (in kbit/s)", "tolerance" }, + { "maxrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_max}, "set max video bitrate tolerance (in kbit/s)", "bitrate" }, + { "minrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_min}, "set min video bitrate tolerance (in kbit/s)", "bitrate" }, + { "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffer size (in kByte)", "size" }, + { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" }, + { "me", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_motion_estimation}, "set motion estimation method", + "method" }, + { "me_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold", "" }, + { "mb_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_threshold}, "macroblock threshold", "" }, + { "bf", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_b_frames}, "use 'frames' B frames", "frames" }, + { "preme", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_pre_me}, "pre motion estimation", "" }, + { "bug", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_workaround_bugs}, "workaround not auto detected encoder bugs", "param" }, + { "ps", HAS_ARG | OPT_EXPERT, {(void*)opt_packet_size}, "set packet size in bits", "size" }, + { "error", HAS_ARG | OPT_EXPERT, {(void*)opt_error_rate}, "error rate", "rate" }, + { "strict", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_strict}, "how strictly to follow the standards", "strictness" }, + { "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality}, + "use same video quality as source (implies VBR)" }, + { "pass", HAS_ARG | OPT_VIDEO, {(void*)&opt_pass}, "select the pass number (1 or 2)", "n" }, + { "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename}, "select two pass log file name", "file" }, + { "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace}, + "deinterlace pictures" }, + { "psnr", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_psnr}, "calculate PSNR of compressed frames" }, + { "vstats", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_vstats}, "dump video coding statistics to file" }, + { "vhook", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)add_frame_hooker}, "insert video processing module", "module" }, + { "intra_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_intra_matrix}, "specify intra matrix coeffs", "matrix" }, + { "inter_matrix", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_inter_matrix}, "specify inter matrix coeffs", "matrix" }, + { "top", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_top_field_first}, "top=1/bottom=0/auto=-1 field first", "" }, + { "sc_threshold", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_sc_threshold}, "scene change threshold", "threshold" }, + { "me_range", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_range}, "limit motion vectors range (1023 for DivX player)", "range" }, + { "dc", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&intra_dc_precision}, "intra_dc_precision", "precision" }, + { "mepc", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&me_penalty_compensation}, "motion estimation bitrate penalty compensation", "factor (1.0 = 256)" }, + { "vtag", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_tag}, "force video tag/fourcc", "fourcc/tag" }, + { "skip_threshold", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&frame_skip_threshold}, "frame skip threshold", "threshold" }, + { "skip_factor", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&frame_skip_factor}, "frame skip factor", "factor" }, + { "skip_exp", OPT_INT | HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)&frame_skip_exp}, "frame skip exponent", "exponent" }, + { "newvideo", OPT_VIDEO, {(void*)opt_new_video_stream}, "add a new video stream to the current output stream" }, + { "genpts", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, { (void *)&genpts }, "generate pts" }, + { "qphist", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, { (void *)&qp_hist }, "show QP histogram" }, + + /* audio options */ + { "ab", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_bitrate}, "set audio bitrate (in kbit/s)", "bitrate", }, + { "aq", OPT_FLOAT | HAS_ARG | OPT_AUDIO, {(void*)&audio_qscale}, "set audio quality (codec-specific)", "quality", }, + { "ar", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_rate}, "set audio sampling rate (in Hz)", "rate" }, + { "ac", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_channels}, "set number of audio channels", "channels" }, + { "an", OPT_BOOL | OPT_AUDIO, {(void*)&audio_disable}, "disable audio" }, + { "acodec", HAS_ARG | OPT_AUDIO, {(void*)opt_audio_codec}, "force audio codec ('copy' to copy stream)", "codec" }, + { "atag", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_audio_tag}, "force audio tag/fourcc", "fourcc/tag" }, + { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, "change audio volume (256=normal)" , "volume" }, // + { "newaudio", OPT_AUDIO, {(void*)opt_new_audio_stream}, "add a new audio stream to the current output stream" }, + { "alang", HAS_ARG | OPT_STRING | OPT_AUDIO, {(void *)&audio_language}, "set the ISO 639 language code (3 letters) of the current audio stream" , "code" }, + + /* subtitle options */ + { "scodec", HAS_ARG | OPT_SUBTITLE, {(void*)opt_subtitle_codec}, "force subtitle codec ('copy' to copy stream)", "codec" }, + { "newsubtitle", OPT_SUBTITLE, {(void*)opt_new_subtitle_stream}, "add a new subtitle stream to the current output stream" }, + { "slang", HAS_ARG | OPT_STRING | OPT_SUBTITLE, {(void *)&subtitle_language}, "set the ISO 639 language code (3 letters) of the current subtitle stream" , "code" }, + + /* grab options */ + { "vd", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_device}, "set video grab device", "device" }, + { "vc", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_channel}, "set video grab channel (DV1394 only)", "channel" }, + { "tvstd", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_video_standard}, "set television standard (NTSC, PAL (SECAM))", "standard" }, + { "ad", HAS_ARG | OPT_EXPERT | OPT_AUDIO | OPT_GRAB, {(void*)opt_audio_device}, "set audio device", "device" }, + + /* G.2 grab options */ + { "grab", HAS_ARG | OPT_EXPERT | OPT_GRAB, {(void*)opt_grab}, "request grabbing using", "format" }, + { "gd", HAS_ARG | OPT_EXPERT | OPT_VIDEO | OPT_GRAB, {(void*)opt_grab_device}, "set grab device", "device" }, + + /* muxer options */ + { "muxrate", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&mux_rate}, "set mux rate", "rate" }, + { "packetsize", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&mux_packet_size}, "set packet size", "size" }, + { "muxdelay", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_max_delay}, "set the maximum demux-decode delay", "seconds" }, + { "muxpreload", OPT_FLOAT | HAS_ARG | OPT_EXPERT, {(void*)&mux_preload}, "set the initial demux-decode delay", "seconds" }, + { "default", OPT_FUNC2 | HAS_ARG | OPT_AUDIO | OPT_VIDEO | OPT_EXPERT, {(void*)opt_default}, "generic catch all option", "" }, + { NULL, }, +}; + +static void show_banner(void) +{ + fprintf(stderr, "FFmpeg version " FFMPEG_VERSION ", Copyright (c) 2000-2004 Fabrice Bellard\n"); + fprintf(stderr, " configuration: " FFMPEG_CONFIGURATION "\n"); + fprintf(stderr, " libavutil version: " AV_STRINGIFY(LIBAVUTIL_VERSION) "\n"); + fprintf(stderr, " libavcodec version: " AV_STRINGIFY(LIBAVCODEC_VERSION) "\n"); + fprintf(stderr, " libavformat version: " AV_STRINGIFY(LIBAVFORMAT_VERSION) "\n"); + fprintf(stderr, " built on " __DATE__ " " __TIME__); +#ifdef __GNUC__ + fprintf(stderr, ", gcc: " __VERSION__ "\n"); +#else + fprintf(stderr, ", using a non-gcc compiler\n"); +#endif +} + +static void show_license(void) +{ + show_banner(); +#ifdef CONFIG_GPL + printf( + "This program is free software; you can redistribute it and/or modify\n" + "it under the terms of the GNU General Public License as published by\n" + "the Free Software Foundation; either version 2 of the License, or\n" + "(at your option) any later version.\n" + "\n" + "This program is distributed in the hope that it will be useful,\n" + "but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" + "GNU General Public License for more details.\n" + "\n" + "You should have received a copy of the GNU General Public License\n" + "along with this program; if not, write to the Free Software\n" + "Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\n" + ); +#else + printf( + "This library is free software; you can redistribute it and/or\n" + "modify it under the terms of the GNU Lesser General Public\n" + "License as published by the Free Software Foundation; either\n" + "version 2 of the License, or (at your option) any later version.\n" + "\n" + "This library is distributed in the hope that it will be useful,\n" + "but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" + "Lesser General Public License for more details.\n" + "\n" + "You should have received a copy of the GNU Lesser General Public\n" + "License along with this library; if not, write to the Free Software\n" + "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\n" + ); +#endif + exit(1); +} + +static void show_help(void) +{ + show_banner(); + printf("usage: ffmpeg [[infile options] -i infile]... {[outfile options] outfile}...\n" + "Hyper fast Audio and Video encoder\n"); + printf("\n"); + show_help_options(options, "Main options:\n", + OPT_EXPERT | OPT_AUDIO | OPT_VIDEO, 0); + show_help_options(options, "\nVideo options:\n", + OPT_EXPERT | OPT_AUDIO | OPT_VIDEO | OPT_GRAB, + OPT_VIDEO); + show_help_options(options, "\nAdvanced Video options:\n", + OPT_EXPERT | OPT_AUDIO | OPT_VIDEO | OPT_GRAB, + OPT_VIDEO | OPT_EXPERT); + show_help_options(options, "\nAudio options:\n", + OPT_EXPERT | OPT_AUDIO | OPT_VIDEO | OPT_GRAB, + OPT_AUDIO); + show_help_options(options, "\nAdvanced Audio options:\n", + OPT_EXPERT | OPT_AUDIO | OPT_VIDEO | OPT_GRAB, + OPT_AUDIO | OPT_EXPERT); + show_help_options(options, "\nSubtitle options:\n", + OPT_SUBTITLE | OPT_GRAB, + OPT_SUBTITLE); + show_help_options(options, "\nAudio/Video grab options:\n", + OPT_GRAB, + OPT_GRAB); + show_help_options(options, "\nAdvanced options:\n", + OPT_EXPERT | OPT_AUDIO | OPT_VIDEO | OPT_GRAB, + OPT_EXPERT); + av_opt_show(avctx_opts, NULL); + + exit(1); +} + +void parse_arg_file(const char *filename) +{ + opt_output_file(filename); +} + +int main(int argc, char **argv) +{ + int i; + int64_t ti; + + av_register_all(); + + avctx_opts= avcodec_alloc_context(); + + if (argc <= 1) + show_help(); + else + show_banner(); + + /* parse options */ + parse_options(argc, argv, options); + + /* file converter / grab */ + if (nb_output_files <= 0) { + fprintf(stderr, "Must supply at least one output file\n"); + exit(1); + } + + if (nb_input_files == 0) { + input_sync = 1; + prepare_grab(); + } + + ti = getutime(); + av_encode(output_files, nb_output_files, input_files, nb_input_files, + stream_maps, nb_stream_maps); + ti = getutime() - ti; + if (do_benchmark) { + printf("bench: utime=%0.3fs\n", ti / 1000000.0); + } + + /* close files */ + for(i=0;ioformat->flags & AVFMT_NOFILE)) + url_fclose(&s->pb); + for(j=0;jnb_streams;j++) + av_free(s->streams[j]); + av_free(s); + } + for(i=0;i +#include + +#ifdef CONFIG_WIN32 +#undef main /* We don't want SDL to override our main() */ +#endif + +#ifdef CONFIG_OS2 +#define INCL_DOS + #include + #include + + void MorphToPM() + { + PPIB pib; + PTIB tib; + + DosGetInfoBlocks(&tib, &pib); + + // Change flag from VIO to PM: + if (pib->pib_ultype==2) pib->pib_ultype = 3; + } +#endif + +#if defined(__linux__) +#define HAVE_X11 +#endif + +#ifdef HAVE_X11 +#include +#endif + +//#define DEBUG_SYNC + +#define MAX_VIDEOQ_SIZE (5 * 256 * 1024) +#define MAX_AUDIOQ_SIZE (5 * 16 * 1024) +#define MAX_SUBTITLEQ_SIZE (5 * 16 * 1024) + +/* SDL audio buffer size, in samples. Should be small to have precise + A/V sync as SDL does not have hardware buffer fullness info. */ +#define SDL_AUDIO_BUFFER_SIZE 1024 + +/* no AV sync correction is done if below the AV sync threshold */ +#define AV_SYNC_THRESHOLD 0.01 +/* no AV correction is done if too big error */ +#define AV_NOSYNC_THRESHOLD 10.0 + +/* maximum audio speed change to get correct sync */ +#define SAMPLE_CORRECTION_PERCENT_MAX 10 + +/* we use about AUDIO_DIFF_AVG_NB A-V differences to make the average */ +#define AUDIO_DIFF_AVG_NB 20 + +/* NOTE: the size must be big enough to compensate the hardware audio buffersize size */ +#define SAMPLE_ARRAY_SIZE (2*65536) + +typedef struct PacketQueue { + AVPacketList *first_pkt, *last_pkt; + int nb_packets; + int size; + int abort_request; + SDL_mutex *mutex; + SDL_cond *cond; +} PacketQueue; + +#define VIDEO_PICTURE_QUEUE_SIZE 1 +#define SUBPICTURE_QUEUE_SIZE 4 + +typedef struct VideoPicture { + double pts; ///mutex = SDL_CreateMutex(); + q->cond = SDL_CreateCond(); +} + +static void packet_queue_flush(PacketQueue *q) +{ + AVPacketList *pkt, *pkt1; + + SDL_LockMutex(q->mutex); + for(pkt = q->first_pkt; pkt != NULL; pkt = pkt1) { + pkt1 = pkt->next; + av_free_packet(&pkt->pkt); + av_freep(&pkt); + } + q->last_pkt = NULL; + q->first_pkt = NULL; + q->nb_packets = 0; + q->size = 0; + SDL_UnlockMutex(q->mutex); +} + +static void packet_queue_end(PacketQueue *q) +{ + packet_queue_flush(q); + SDL_DestroyMutex(q->mutex); + SDL_DestroyCond(q->cond); +} + +static int packet_queue_put(PacketQueue *q, AVPacket *pkt) +{ + AVPacketList *pkt1; + + /* duplicate the packet */ + if (av_dup_packet(pkt) < 0) + return -1; + + pkt1 = av_malloc(sizeof(AVPacketList)); + if (!pkt1) + return -1; + pkt1->pkt = *pkt; + pkt1->next = NULL; + + + SDL_LockMutex(q->mutex); + + if (!q->last_pkt) + + q->first_pkt = pkt1; + else + q->last_pkt->next = pkt1; + q->last_pkt = pkt1; + q->nb_packets++; + q->size += pkt1->pkt.size; + /* XXX: should duplicate packet data in DV case */ + SDL_CondSignal(q->cond); + + SDL_UnlockMutex(q->mutex); + return 0; +} + +static void packet_queue_abort(PacketQueue *q) +{ + SDL_LockMutex(q->mutex); + + q->abort_request = 1; + + SDL_CondSignal(q->cond); + + SDL_UnlockMutex(q->mutex); +} + +/* return < 0 if aborted, 0 if no packet and > 0 if packet. */ +static int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block) +{ + AVPacketList *pkt1; + int ret; + + SDL_LockMutex(q->mutex); + + for(;;) { + if (q->abort_request) { + ret = -1; + break; + } + + pkt1 = q->first_pkt; + if (pkt1) { + q->first_pkt = pkt1->next; + if (!q->first_pkt) + q->last_pkt = NULL; + q->nb_packets--; + q->size -= pkt1->pkt.size; + *pkt = pkt1->pkt; + av_free(pkt1); + ret = 1; + break; + } else if (!block) { + ret = 0; + break; + } else { + SDL_CondWait(q->cond, q->mutex); + } + } + SDL_UnlockMutex(q->mutex); + return ret; +} + +static inline void fill_rectangle(SDL_Surface *screen, + int x, int y, int w, int h, int color) +{ + SDL_Rect rect; + rect.x = x; + rect.y = y; + rect.w = w; + rect.h = h; + SDL_FillRect(screen, &rect, color); +} + +#if 0 +/* draw only the border of a rectangle */ +void fill_border(VideoState *s, int x, int y, int w, int h, int color) +{ + int w1, w2, h1, h2; + + /* fill the background */ + w1 = x; + if (w1 < 0) + w1 = 0; + w2 = s->width - (x + w); + if (w2 < 0) + w2 = 0; + h1 = y; + if (h1 < 0) + h1 = 0; + h2 = s->height - (y + h); + if (h2 < 0) + h2 = 0; + fill_rectangle(screen, + s->xleft, s->ytop, + w1, s->height, + color); + fill_rectangle(screen, + s->xleft + s->width - w2, s->ytop, + w2, s->height, + color); + fill_rectangle(screen, + s->xleft + w1, s->ytop, + s->width - w1 - w2, h1, + color); + fill_rectangle(screen, + s->xleft + w1, s->ytop + s->height - h2, + s->width - w1 - w2, h2, + color); +} +#endif + + + +#define SCALEBITS 10 +#define ONE_HALF (1 << (SCALEBITS - 1)) +#define FIX(x) ((int) ((x) * (1<> SCALEBITS) + +#define RGB_TO_U_CCIR(r1, g1, b1, shift)\ +(((- FIX(0.16874*224.0/255.0) * r1 - FIX(0.33126*224.0/255.0) * g1 + \ + FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_V_CCIR(r1, g1, b1, shift)\ +(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 - \ + FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define ALPHA_BLEND(a, oldp, newp, s)\ +((((oldp << s) * (255 - (a))) + (newp * (a))) / (255 << s)) + +#define RGBA_IN(r, g, b, a, s)\ +{\ + unsigned int v = ((const uint32_t *)(s))[0];\ + a = (v >> 24) & 0xff;\ + r = (v >> 16) & 0xff;\ + g = (v >> 8) & 0xff;\ + b = v & 0xff;\ +} + +#define YUVA_IN(y, u, v, a, s, pal)\ +{\ + unsigned int val = ((const uint32_t *)(pal))[*(const uint8_t*)s];\ + a = (val >> 24) & 0xff;\ + y = (val >> 16) & 0xff;\ + u = (val >> 8) & 0xff;\ + v = val & 0xff;\ +} + +#define YUVA_OUT(d, y, u, v, a)\ +{\ + ((uint32_t *)(d))[0] = (a << 24) | (y << 16) | (u << 8) | v;\ +} + + +#define BPP 1 + +static void blend_subrect(AVPicture *dst, const AVSubtitleRect *rect) +{ + int wrap, wrap3, width2, skip2; + int y, u, v, a, u1, v1, a1, w, h; + uint8_t *lum, *cb, *cr; + const uint8_t *p; + const uint32_t *pal; + + lum = dst->data[0] + rect->y * dst->linesize[0]; + cb = dst->data[1] + (rect->y >> 1) * dst->linesize[1]; + cr = dst->data[2] + (rect->y >> 1) * dst->linesize[2]; + + width2 = (rect->w + 1) >> 1; + skip2 = rect->x >> 1; + wrap = dst->linesize[0]; + wrap3 = rect->linesize; + p = rect->bitmap; + pal = rect->rgba_palette; /* Now in YCrCb! */ + + if (rect->y & 1) { + lum += rect->x; + cb += skip2; + cr += skip2; + + if (rect->x & 1) { + YUVA_IN(y, u, v, a, p, pal); + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0); + cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0); + cb++; + cr++; + lum++; + p += BPP; + } + for(w = rect->w - (rect->x & 1); w >= 2; w -= 2) { + YUVA_IN(y, u, v, a, p, pal); + u1 = u; + v1 = v; + a1 = a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + + YUVA_IN(y, u, v, a, p + BPP, pal); + u1 += u; + v1 += v; + a1 += a; + lum[1] = ALPHA_BLEND(a, lum[1], y, 0); + cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 1); + cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 1); + cb++; + cr++; + p += 2 * BPP; + lum += 2; + } + if (w) { + YUVA_IN(y, u, v, a, p, pal); + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0); + cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0); + } + p += wrap3 + (wrap3 - rect->w * BPP); + lum += wrap + (wrap - rect->w - rect->x); + cb += dst->linesize[1] - width2 - skip2; + cr += dst->linesize[2] - width2 - skip2; + } + for(h = rect->h - (rect->y & 1); h >= 2; h -= 2) { + lum += rect->x; + cb += skip2; + cr += skip2; + + if (rect->x & 1) { + YUVA_IN(y, u, v, a, p, pal); + u1 = u; + v1 = v; + a1 = a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + p += wrap3; + lum += wrap; + YUVA_IN(y, u, v, a, p, pal); + u1 += u; + v1 += v; + a1 += a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 1); + cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 1); + cb++; + cr++; + p += -wrap3 + BPP; + lum += -wrap + 1; + } + for(w = rect->w - (rect->x & 1); w >= 2; w -= 2) { + YUVA_IN(y, u, v, a, p, pal); + u1 = u; + v1 = v; + a1 = a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + + YUVA_IN(y, u, v, a, p, pal); + u1 += u; + v1 += v; + a1 += a; + lum[1] = ALPHA_BLEND(a, lum[1], y, 0); + p += wrap3; + lum += wrap; + + YUVA_IN(y, u, v, a, p, pal); + u1 += u; + v1 += v; + a1 += a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + + YUVA_IN(y, u, v, a, p, pal); + u1 += u; + v1 += v; + a1 += a; + lum[1] = ALPHA_BLEND(a, lum[1], y, 0); + + cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 2); + cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 2); + + cb++; + cr++; + p += -wrap3 + 2 * BPP; + lum += -wrap + 2; + } + if (w) { + YUVA_IN(y, u, v, a, p, pal); + u1 = u; + v1 = v; + a1 = a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + p += wrap3; + lum += wrap; + YUVA_IN(y, u, v, a, p, pal); + u1 += u; + v1 += v; + a1 += a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u1, 1); + cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v1, 1); + cb++; + cr++; + p += -wrap3 + BPP; + lum += -wrap + 1; + } + p += wrap3 + (wrap3 - rect->w * BPP); + lum += wrap + (wrap - rect->w - rect->x); + cb += dst->linesize[1] - width2 - skip2; + cr += dst->linesize[2] - width2 - skip2; + } + /* handle odd height */ + if (h) { + lum += rect->x; + cb += skip2; + cr += skip2; + + if (rect->x & 1) { + YUVA_IN(y, u, v, a, p, pal); + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0); + cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0); + cb++; + cr++; + lum++; + p += BPP; + } + for(w = rect->w - (rect->x & 1); w >= 2; w -= 2) { + YUVA_IN(y, u, v, a, p, pal); + u1 = u; + v1 = v; + a1 = a; + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + + YUVA_IN(y, u, v, a, p + BPP, pal); + u1 += u; + v1 += v; + a1 += a; + lum[1] = ALPHA_BLEND(a, lum[1], y, 0); + cb[0] = ALPHA_BLEND(a1 >> 2, cb[0], u, 1); + cr[0] = ALPHA_BLEND(a1 >> 2, cr[0], v, 1); + cb++; + cr++; + p += 2 * BPP; + lum += 2; + } + if (w) { + YUVA_IN(y, u, v, a, p, pal); + lum[0] = ALPHA_BLEND(a, lum[0], y, 0); + cb[0] = ALPHA_BLEND(a >> 2, cb[0], u, 0); + cr[0] = ALPHA_BLEND(a >> 2, cr[0], v, 0); + } + } +} + +static void free_subpicture(SubPicture *sp) +{ + int i; + + for (i = 0; i < sp->sub.num_rects; i++) + { + av_free(sp->sub.rects[i].bitmap); + av_free(sp->sub.rects[i].rgba_palette); + } + + av_free(sp->sub.rects); + + memset(&sp->sub, 0, sizeof(AVSubtitle)); +} + +static void video_image_display(VideoState *is) +{ + VideoPicture *vp; + SubPicture *sp; + AVPicture pict; + float aspect_ratio; + int width, height, x, y; + SDL_Rect rect; + int i; + + vp = &is->pictq[is->pictq_rindex]; + if (vp->bmp) { + /* XXX: use variable in the frame */ + if (is->video_st->codec->sample_aspect_ratio.num == 0) + aspect_ratio = 0; + else + aspect_ratio = av_q2d(is->video_st->codec->sample_aspect_ratio) + * is->video_st->codec->width / is->video_st->codec->height;; + if (aspect_ratio <= 0.0) + aspect_ratio = (float)is->video_st->codec->width / + (float)is->video_st->codec->height; + /* if an active format is indicated, then it overrides the + mpeg format */ +#if 0 + if (is->video_st->codec->dtg_active_format != is->dtg_active_format) { + is->dtg_active_format = is->video_st->codec->dtg_active_format; + printf("dtg_active_format=%d\n", is->dtg_active_format); + } +#endif +#if 0 + switch(is->video_st->codec->dtg_active_format) { + case FF_DTG_AFD_SAME: + default: + /* nothing to do */ + break; + case FF_DTG_AFD_4_3: + aspect_ratio = 4.0 / 3.0; + break; + case FF_DTG_AFD_16_9: + aspect_ratio = 16.0 / 9.0; + break; + case FF_DTG_AFD_14_9: + aspect_ratio = 14.0 / 9.0; + break; + case FF_DTG_AFD_4_3_SP_14_9: + aspect_ratio = 14.0 / 9.0; + break; + case FF_DTG_AFD_16_9_SP_14_9: + aspect_ratio = 14.0 / 9.0; + break; + case FF_DTG_AFD_SP_4_3: + aspect_ratio = 4.0 / 3.0; + break; + } +#endif + + if (is->subtitle_st) + { + if (is->subpq_size > 0) + { + sp = &is->subpq[is->subpq_rindex]; + + if (vp->pts >= sp->pts + ((float) sp->sub.start_display_time / 1000)) + { + SDL_LockYUVOverlay (vp->bmp); + + pict.data[0] = vp->bmp->pixels[0]; + pict.data[1] = vp->bmp->pixels[2]; + pict.data[2] = vp->bmp->pixels[1]; + + pict.linesize[0] = vp->bmp->pitches[0]; + pict.linesize[1] = vp->bmp->pitches[2]; + pict.linesize[2] = vp->bmp->pitches[1]; + + for (i = 0; i < sp->sub.num_rects; i++) + blend_subrect(&pict, &sp->sub.rects[i]); + + SDL_UnlockYUVOverlay (vp->bmp); + } + } + } + + + /* XXX: we suppose the screen has a 1.0 pixel ratio */ + height = is->height; + width = ((int)rint(height * aspect_ratio)) & -3; + if (width > is->width) { + width = is->width; + height = ((int)rint(width / aspect_ratio)) & -3; + } + x = (is->width - width) / 2; + y = (is->height - height) / 2; + if (!is->no_background) { + /* fill the background */ + // fill_border(is, x, y, width, height, QERGB(0x00, 0x00, 0x00)); + } else { + is->no_background = 0; + } + rect.x = is->xleft + x; + rect.y = is->xleft + y; + rect.w = width; + rect.h = height; + SDL_DisplayYUVOverlay(vp->bmp, &rect); + } else { +#if 0 + fill_rectangle(screen, + is->xleft, is->ytop, is->width, is->height, + QERGB(0x00, 0x00, 0x00)); +#endif + } +} + +static inline int compute_mod(int a, int b) +{ + a = a % b; + if (a >= 0) + return a; + else + return a + b; +} + +static void video_audio_display(VideoState *s) +{ + int i, i_start, x, y1, y, ys, delay, n, nb_display_channels; + int ch, channels, h, h2, bgcolor, fgcolor; + int16_t time_diff; + + /* compute display index : center on currently output samples */ + channels = s->audio_st->codec->channels; + nb_display_channels = channels; + if (!s->paused) { + n = 2 * channels; + delay = audio_write_get_buf_size(s); + delay /= n; + + /* to be more precise, we take into account the time spent since + the last buffer computation */ + if (audio_callback_time) { + time_diff = av_gettime() - audio_callback_time; + delay += (time_diff * s->audio_st->codec->sample_rate) / 1000000; + } + + delay -= s->width / 2; + if (delay < s->width) + delay = s->width; + i_start = compute_mod(s->sample_array_index - delay * channels, SAMPLE_ARRAY_SIZE); + s->last_i_start = i_start; + } else { + i_start = s->last_i_start; + } + + bgcolor = SDL_MapRGB(screen->format, 0x00, 0x00, 0x00); + fill_rectangle(screen, + s->xleft, s->ytop, s->width, s->height, + bgcolor); + + fgcolor = SDL_MapRGB(screen->format, 0xff, 0xff, 0xff); + + /* total height for one channel */ + h = s->height / nb_display_channels; + /* graph height / 2 */ + h2 = (h * 9) / 20; + for(ch = 0;ch < nb_display_channels; ch++) { + i = i_start + ch; + y1 = s->ytop + ch * h + (h / 2); /* position of center line */ + for(x = 0; x < s->width; x++) { + y = (s->sample_array[i] * h2) >> 15; + if (y < 0) { + y = -y; + ys = y1 - y; + } else { + ys = y1; + } + fill_rectangle(screen, + s->xleft + x, ys, 1, y, + fgcolor); + i += channels; + if (i >= SAMPLE_ARRAY_SIZE) + i -= SAMPLE_ARRAY_SIZE; + } + } + + fgcolor = SDL_MapRGB(screen->format, 0x00, 0x00, 0xff); + + for(ch = 1;ch < nb_display_channels; ch++) { + y = s->ytop + ch * h; + fill_rectangle(screen, + s->xleft, y, s->width, 1, + fgcolor); + } + SDL_UpdateRect(screen, s->xleft, s->ytop, s->width, s->height); +} + +/* display the current picture, if any */ +static void video_display(VideoState *is) +{ + if (is->audio_st && is->show_audio) + video_audio_display(is); + else if (is->video_st) + video_image_display(is); +} + +static Uint32 sdl_refresh_timer_cb(Uint32 interval, void *opaque) +{ + SDL_Event event; + event.type = FF_REFRESH_EVENT; + event.user.data1 = opaque; + SDL_PushEvent(&event); + return 0; /* 0 means stop timer */ +} + +/* schedule a video refresh in 'delay' ms */ +static void schedule_refresh(VideoState *is, int delay) +{ + SDL_AddTimer(delay, sdl_refresh_timer_cb, is); +} + +/* get the current audio clock value */ +static double get_audio_clock(VideoState *is) +{ + double pts; + int hw_buf_size, bytes_per_sec; + pts = is->audio_clock; + hw_buf_size = audio_write_get_buf_size(is); + bytes_per_sec = 0; + if (is->audio_st) { + bytes_per_sec = is->audio_st->codec->sample_rate * + 2 * is->audio_st->codec->channels; + } + if (bytes_per_sec) + pts -= (double)hw_buf_size / bytes_per_sec; + return pts; +} + +/* get the current video clock value */ +static double get_video_clock(VideoState *is) +{ + double delta; + if (is->paused) { + delta = 0; + } else { + delta = (av_gettime() - is->video_current_pts_time) / 1000000.0; + } + return is->video_current_pts + delta; +} + +/* get the current external clock value */ +static double get_external_clock(VideoState *is) +{ + int64_t ti; + ti = av_gettime(); + return is->external_clock + ((ti - is->external_clock_time) * 1e-6); +} + +/* get the current master clock value */ +static double get_master_clock(VideoState *is) +{ + double val; + + if (is->av_sync_type == AV_SYNC_VIDEO_MASTER) { + if (is->video_st) + val = get_video_clock(is); + else + val = get_audio_clock(is); + } else if (is->av_sync_type == AV_SYNC_AUDIO_MASTER) { + if (is->audio_st) + val = get_audio_clock(is); + else + val = get_video_clock(is); + } else { + val = get_external_clock(is); + } + return val; +} + +/* seek in the stream */ +static void stream_seek(VideoState *is, int64_t pos, int rel) +{ + if (!is->seek_req) { + is->seek_pos = pos; + is->seek_flags = rel < 0 ? AVSEEK_FLAG_BACKWARD : 0; + is->seek_req = 1; + } +} + +/* pause or resume the video */ +static void stream_pause(VideoState *is) +{ + is->paused = !is->paused; + if (is->paused) { + is->video_current_pts = get_video_clock(is); + } +} + +/* called to display each frame */ +static void video_refresh_timer(void *opaque) +{ + VideoState *is = opaque; + VideoPicture *vp; + double actual_delay, delay, sync_threshold, ref_clock, diff; + + SubPicture *sp, *sp2; + + if (is->video_st) { + if (is->pictq_size == 0) { + /* if no picture, need to wait */ + schedule_refresh(is, 1); + } else { + /* dequeue the picture */ + vp = &is->pictq[is->pictq_rindex]; + + /* update current video pts */ + is->video_current_pts = vp->pts; + is->video_current_pts_time = av_gettime(); + + /* compute nominal delay */ + delay = vp->pts - is->frame_last_pts; + if (delay <= 0 || delay >= 1.0) { + /* if incorrect delay, use previous one */ + delay = is->frame_last_delay; + } + is->frame_last_delay = delay; + is->frame_last_pts = vp->pts; + + /* update delay to follow master synchronisation source */ + if (((is->av_sync_type == AV_SYNC_AUDIO_MASTER && is->audio_st) || + is->av_sync_type == AV_SYNC_EXTERNAL_CLOCK)) { + /* if video is slave, we try to correct big delays by + duplicating or deleting a frame */ + ref_clock = get_master_clock(is); + diff = vp->pts - ref_clock; + + /* skip or repeat frame. We take into account the + delay to compute the threshold. I still don't know + if it is the best guess */ + sync_threshold = AV_SYNC_THRESHOLD; + if (delay > sync_threshold) + sync_threshold = delay; + if (fabs(diff) < AV_NOSYNC_THRESHOLD) { + if (diff <= -sync_threshold) + delay = 0; + else if (diff >= sync_threshold) + delay = 2 * delay; + } + } + + is->frame_timer += delay; + /* compute the REAL delay (we need to do that to avoid + long term errors */ + actual_delay = is->frame_timer - (av_gettime() / 1000000.0); + if (actual_delay < 0.010) { + /* XXX: should skip picture */ + actual_delay = 0.010; + } + /* launch timer for next picture */ + schedule_refresh(is, (int)(actual_delay * 1000 + 0.5)); + +#if defined(DEBUG_SYNC) + printf("video: delay=%0.3f actual_delay=%0.3f pts=%0.3f A-V=%f\n", + delay, actual_delay, vp->pts, -diff); +#endif + + if(is->subtitle_st) { + if (is->subtitle_stream_changed) { + SDL_LockMutex(is->subpq_mutex); + + while (is->subpq_size) { + free_subpicture(&is->subpq[is->subpq_rindex]); + + /* update queue size and signal for next picture */ + if (++is->subpq_rindex == SUBPICTURE_QUEUE_SIZE) + is->subpq_rindex = 0; + + is->subpq_size--; + } + is->subtitle_stream_changed = 0; + + SDL_CondSignal(is->subpq_cond); + SDL_UnlockMutex(is->subpq_mutex); + } else { + if (is->subpq_size > 0) { + sp = &is->subpq[is->subpq_rindex]; + + if (is->subpq_size > 1) + sp2 = &is->subpq[(is->subpq_rindex + 1) % SUBPICTURE_QUEUE_SIZE]; + else + sp2 = NULL; + + if ((is->video_current_pts > (sp->pts + ((float) sp->sub.end_display_time / 1000))) + || (sp2 && is->video_current_pts > (sp2->pts + ((float) sp2->sub.start_display_time / 1000)))) + { + free_subpicture(sp); + + /* update queue size and signal for next picture */ + if (++is->subpq_rindex == SUBPICTURE_QUEUE_SIZE) + is->subpq_rindex = 0; + + SDL_LockMutex(is->subpq_mutex); + is->subpq_size--; + SDL_CondSignal(is->subpq_cond); + SDL_UnlockMutex(is->subpq_mutex); + } + } + } + } + + /* display picture */ + video_display(is); + + /* update queue size and signal for next picture */ + if (++is->pictq_rindex == VIDEO_PICTURE_QUEUE_SIZE) + is->pictq_rindex = 0; + + SDL_LockMutex(is->pictq_mutex); + is->pictq_size--; + SDL_CondSignal(is->pictq_cond); + SDL_UnlockMutex(is->pictq_mutex); + } + } else if (is->audio_st) { + /* draw the next audio frame */ + + schedule_refresh(is, 40); + + /* if only audio stream, then display the audio bars (better + than nothing, just to test the implementation */ + + /* display picture */ + video_display(is); + } else { + schedule_refresh(is, 100); + } + if (show_status) { + static int64_t last_time; + int64_t cur_time; + int aqsize, vqsize, sqsize; + double av_diff; + + cur_time = av_gettime(); + if (!last_time || (cur_time - last_time) >= 500 * 1000) { + aqsize = 0; + vqsize = 0; + sqsize = 0; + if (is->audio_st) + aqsize = is->audioq.size; + if (is->video_st) + vqsize = is->videoq.size; + if (is->subtitle_st) + sqsize = is->subtitleq.size; + av_diff = 0; + if (is->audio_st && is->video_st) + av_diff = get_audio_clock(is) - get_video_clock(is); + printf("%7.2f A-V:%7.3f aq=%5dKB vq=%5dKB sq=%5dB \r", + get_master_clock(is), av_diff, aqsize / 1024, vqsize / 1024, sqsize); + fflush(stdout); + last_time = cur_time; + } + } +} + +/* allocate a picture (needs to do that in main thread to avoid + potential locking problems */ +static void alloc_picture(void *opaque) +{ + VideoState *is = opaque; + VideoPicture *vp; + + vp = &is->pictq[is->pictq_windex]; + + if (vp->bmp) + SDL_FreeYUVOverlay(vp->bmp); + +#if 0 + /* XXX: use generic function */ + /* XXX: disable overlay if no hardware acceleration or if RGB format */ + switch(is->video_st->codec->pix_fmt) { + case PIX_FMT_YUV420P: + case PIX_FMT_YUV422P: + case PIX_FMT_YUV444P: + case PIX_FMT_YUV422: + case PIX_FMT_YUV410P: + case PIX_FMT_YUV411P: + is_yuv = 1; + break; + default: + is_yuv = 0; + break; + } +#endif + vp->bmp = SDL_CreateYUVOverlay(is->video_st->codec->width, + is->video_st->codec->height, + SDL_YV12_OVERLAY, + screen); + vp->width = is->video_st->codec->width; + vp->height = is->video_st->codec->height; + + SDL_LockMutex(is->pictq_mutex); + vp->allocated = 1; + SDL_CondSignal(is->pictq_cond); + SDL_UnlockMutex(is->pictq_mutex); +} + +/** + * + * @param pts the dts of the pkt / pts of the frame and guessed if not known + */ +static int queue_picture(VideoState *is, AVFrame *src_frame, double pts) +{ + VideoPicture *vp; + int dst_pix_fmt; + AVPicture pict; + + /* wait until we have space to put a new picture */ + SDL_LockMutex(is->pictq_mutex); + while (is->pictq_size >= VIDEO_PICTURE_QUEUE_SIZE && + !is->videoq.abort_request) { + SDL_CondWait(is->pictq_cond, is->pictq_mutex); + } + SDL_UnlockMutex(is->pictq_mutex); + + if (is->videoq.abort_request) + return -1; + + vp = &is->pictq[is->pictq_windex]; + + /* alloc or resize hardware picture buffer */ + if (!vp->bmp || + vp->width != is->video_st->codec->width || + vp->height != is->video_st->codec->height) { + SDL_Event event; + + vp->allocated = 0; + + /* the allocation must be done in the main thread to avoid + locking problems */ + event.type = FF_ALLOC_EVENT; + event.user.data1 = is; + SDL_PushEvent(&event); + + /* wait until the picture is allocated */ + SDL_LockMutex(is->pictq_mutex); + while (!vp->allocated && !is->videoq.abort_request) { + SDL_CondWait(is->pictq_cond, is->pictq_mutex); + } + SDL_UnlockMutex(is->pictq_mutex); + + if (is->videoq.abort_request) + return -1; + } + + /* if the frame is not skipped, then display it */ + if (vp->bmp) { + /* get a pointer on the bitmap */ + SDL_LockYUVOverlay (vp->bmp); + + dst_pix_fmt = PIX_FMT_YUV420P; + pict.data[0] = vp->bmp->pixels[0]; + pict.data[1] = vp->bmp->pixels[2]; + pict.data[2] = vp->bmp->pixels[1]; + + pict.linesize[0] = vp->bmp->pitches[0]; + pict.linesize[1] = vp->bmp->pitches[2]; + pict.linesize[2] = vp->bmp->pitches[1]; + img_convert(&pict, dst_pix_fmt, + (AVPicture *)src_frame, is->video_st->codec->pix_fmt, + is->video_st->codec->width, is->video_st->codec->height); + /* update the bitmap content */ + SDL_UnlockYUVOverlay(vp->bmp); + + vp->pts = pts; + + /* now we can update the picture count */ + if (++is->pictq_windex == VIDEO_PICTURE_QUEUE_SIZE) + is->pictq_windex = 0; + SDL_LockMutex(is->pictq_mutex); + is->pictq_size++; + SDL_UnlockMutex(is->pictq_mutex); + } + return 0; +} + +/** + * compute the exact PTS for the picture if it is omitted in the stream + * @param pts1 the dts of the pkt / pts of the frame + */ +static int output_picture2(VideoState *is, AVFrame *src_frame, double pts1) +{ + double frame_delay, pts; + + pts = pts1; + + if (pts != 0) { + /* update video clock with pts, if present */ + is->video_clock = pts; + } else { + pts = is->video_clock; + } + /* update video clock for next frame */ + frame_delay = av_q2d(is->video_st->codec->time_base); + /* for MPEG2, the frame can be repeated, so we update the + clock accordingly */ + frame_delay += src_frame->repeat_pict * (frame_delay * 0.5); + is->video_clock += frame_delay; + +#if defined(DEBUG_SYNC) && 0 + { + int ftype; + if (src_frame->pict_type == FF_B_TYPE) + ftype = 'B'; + else if (src_frame->pict_type == FF_I_TYPE) + ftype = 'I'; + else + ftype = 'P'; + printf("frame_type=%c clock=%0.3f pts=%0.3f\n", + ftype, pts, pts1); + } +#endif + return queue_picture(is, src_frame, pts); +} + +static int video_thread(void *arg) +{ + VideoState *is = arg; + AVPacket pkt1, *pkt = &pkt1; + int len1, got_picture; + AVFrame *frame= avcodec_alloc_frame(); + double pts; + + for(;;) { + while (is->paused && !is->videoq.abort_request) { + SDL_Delay(10); + } + if (packet_queue_get(&is->videoq, pkt, 1) < 0) + break; + /* NOTE: ipts is the PTS of the _first_ picture beginning in + this packet, if any */ + pts = 0; + if (pkt->dts != AV_NOPTS_VALUE) + pts = av_q2d(is->video_st->time_base)*pkt->dts; + + SDL_LockMutex(is->video_decoder_mutex); + len1 = avcodec_decode_video(is->video_st->codec, + frame, &got_picture, + pkt->data, pkt->size); + SDL_UnlockMutex(is->video_decoder_mutex); +// if (len1 < 0) +// break; + if (got_picture) { + if (output_picture2(is, frame, pts) < 0) + goto the_end; + } + av_free_packet(pkt); + if (step) + if (cur_stream) + stream_pause(cur_stream); + } + the_end: + av_free(frame); + return 0; +} + +static int subtitle_thread(void *arg) +{ + VideoState *is = arg; + SubPicture *sp; + AVPacket pkt1, *pkt = &pkt1; + int len1, got_subtitle; + double pts; + int i, j; + int r, g, b, y, u, v, a; + + for(;;) { + while (is->paused && !is->subtitleq.abort_request) { + SDL_Delay(10); + } + if (packet_queue_get(&is->subtitleq, pkt, 1) < 0) + break; + + SDL_LockMutex(is->subpq_mutex); + while (is->subpq_size >= SUBPICTURE_QUEUE_SIZE && + !is->subtitleq.abort_request) { + SDL_CondWait(is->subpq_cond, is->subpq_mutex); + } + SDL_UnlockMutex(is->subpq_mutex); + + if (is->subtitleq.abort_request) + goto the_end; + + sp = &is->subpq[is->subpq_windex]; + + /* NOTE: ipts is the PTS of the _first_ picture beginning in + this packet, if any */ + pts = 0; + if (pkt->pts != AV_NOPTS_VALUE) + pts = av_q2d(is->subtitle_st->time_base)*pkt->pts; + + SDL_LockMutex(is->subtitle_decoder_mutex); + len1 = avcodec_decode_subtitle(is->subtitle_st->codec, + &sp->sub, &got_subtitle, + pkt->data, pkt->size); + SDL_UnlockMutex(is->subtitle_decoder_mutex); +// if (len1 < 0) +// break; + if (got_subtitle && sp->sub.format == 0) { + sp->pts = pts; + + for (i = 0; i < sp->sub.num_rects; i++) + { + for (j = 0; j < sp->sub.rects[i].nb_colors; j++) + { + RGBA_IN(r, g, b, a, sp->sub.rects[i].rgba_palette + j); + y = RGB_TO_Y_CCIR(r, g, b); + u = RGB_TO_U_CCIR(r, g, b, 0); + v = RGB_TO_V_CCIR(r, g, b, 0); + YUVA_OUT(sp->sub.rects[i].rgba_palette + j, y, u, v, a); + } + } + + /* now we can update the picture count */ + if (++is->subpq_windex == SUBPICTURE_QUEUE_SIZE) + is->subpq_windex = 0; + SDL_LockMutex(is->subpq_mutex); + is->subpq_size++; + SDL_UnlockMutex(is->subpq_mutex); + } + av_free_packet(pkt); +// if (step) +// if (cur_stream) +// stream_pause(cur_stream); + } + the_end: + return 0; +} + +/* copy samples for viewing in editor window */ +static void update_sample_display(VideoState *is, short *samples, int samples_size) +{ + int size, len, channels; + + channels = is->audio_st->codec->channels; + + size = samples_size / sizeof(short); + while (size > 0) { + len = SAMPLE_ARRAY_SIZE - is->sample_array_index; + if (len > size) + len = size; + memcpy(is->sample_array + is->sample_array_index, samples, len * sizeof(short)); + samples += len; + is->sample_array_index += len; + if (is->sample_array_index >= SAMPLE_ARRAY_SIZE) + is->sample_array_index = 0; + size -= len; + } +} + +/* return the new audio buffer size (samples can be added or deleted + to get better sync if video or external master clock) */ +static int synchronize_audio(VideoState *is, short *samples, + int samples_size1, double pts) +{ + int n, samples_size; + double ref_clock; + + n = 2 * is->audio_st->codec->channels; + samples_size = samples_size1; + + /* if not master, then we try to remove or add samples to correct the clock */ + if (((is->av_sync_type == AV_SYNC_VIDEO_MASTER && is->video_st) || + is->av_sync_type == AV_SYNC_EXTERNAL_CLOCK)) { + double diff, avg_diff; + int wanted_size, min_size, max_size, nb_samples; + + ref_clock = get_master_clock(is); + diff = get_audio_clock(is) - ref_clock; + + if (diff < AV_NOSYNC_THRESHOLD) { + is->audio_diff_cum = diff + is->audio_diff_avg_coef * is->audio_diff_cum; + if (is->audio_diff_avg_count < AUDIO_DIFF_AVG_NB) { + /* not enough measures to have a correct estimate */ + is->audio_diff_avg_count++; + } else { + /* estimate the A-V difference */ + avg_diff = is->audio_diff_cum * (1.0 - is->audio_diff_avg_coef); + + if (fabs(avg_diff) >= is->audio_diff_threshold) { + wanted_size = samples_size + ((int)(diff * is->audio_st->codec->sample_rate) * n); + nb_samples = samples_size / n; + + min_size = ((nb_samples * (100 - SAMPLE_CORRECTION_PERCENT_MAX)) / 100) * n; + max_size = ((nb_samples * (100 + SAMPLE_CORRECTION_PERCENT_MAX)) / 100) * n; + if (wanted_size < min_size) + wanted_size = min_size; + else if (wanted_size > max_size) + wanted_size = max_size; + + /* add or remove samples to correction the synchro */ + if (wanted_size < samples_size) { + /* remove samples */ + samples_size = wanted_size; + } else if (wanted_size > samples_size) { + uint8_t *samples_end, *q; + int nb; + + /* add samples */ + nb = (samples_size - wanted_size); + samples_end = (uint8_t *)samples + samples_size - n; + q = samples_end + n; + while (nb > 0) { + memcpy(q, samples_end, n); + q += n; + nb -= n; + } + samples_size = wanted_size; + } + } +#if 0 + printf("diff=%f adiff=%f sample_diff=%d apts=%0.3f vpts=%0.3f %f\n", + diff, avg_diff, samples_size - samples_size1, + is->audio_clock, is->video_clock, is->audio_diff_threshold); +#endif + } + } else { + /* too big difference : may be initial PTS errors, so + reset A-V filter */ + is->audio_diff_avg_count = 0; + is->audio_diff_cum = 0; + } + } + + return samples_size; +} + +/* decode one audio frame and returns its uncompressed size */ +static int audio_decode_frame(VideoState *is, uint8_t *audio_buf, double *pts_ptr) +{ + AVPacket *pkt = &is->audio_pkt; + int n, len1, data_size; + double pts; + + for(;;) { + /* NOTE: the audio packet can contain several frames */ + while (is->audio_pkt_size > 0) { + SDL_LockMutex(is->audio_decoder_mutex); + len1 = avcodec_decode_audio(is->audio_st->codec, + (int16_t *)audio_buf, &data_size, + is->audio_pkt_data, is->audio_pkt_size); + SDL_UnlockMutex(is->audio_decoder_mutex); + if (len1 < 0) { + /* if error, we skip the frame */ + is->audio_pkt_size = 0; + break; + } + + is->audio_pkt_data += len1; + is->audio_pkt_size -= len1; + if (data_size <= 0) + continue; + /* if no pts, then compute it */ + pts = is->audio_clock; + *pts_ptr = pts; + n = 2 * is->audio_st->codec->channels; + is->audio_clock += (double)data_size / + (double)(n * is->audio_st->codec->sample_rate); +#if defined(DEBUG_SYNC) + { + static double last_clock; + printf("audio: delay=%0.3f clock=%0.3f pts=%0.3f\n", + is->audio_clock - last_clock, + is->audio_clock, pts); + last_clock = is->audio_clock; + } +#endif + return data_size; + } + + /* free the current packet */ + if (pkt->data) + av_free_packet(pkt); + + if (is->paused || is->audioq.abort_request) { + return -1; + } + + /* read next packet */ + if (packet_queue_get(&is->audioq, pkt, 1) < 0) + return -1; + is->audio_pkt_data = pkt->data; + is->audio_pkt_size = pkt->size; + + /* if update the audio clock with the pts */ + if (pkt->pts != AV_NOPTS_VALUE) { + is->audio_clock = av_q2d(is->audio_st->time_base)*pkt->pts; + } + } +} + +/* get the current audio output buffer size, in samples. With SDL, we + cannot have a precise information */ +static int audio_write_get_buf_size(VideoState *is) +{ + return is->audio_hw_buf_size - is->audio_buf_index; +} + + +/* prepare a new audio buffer */ +void sdl_audio_callback(void *opaque, Uint8 *stream, int len) +{ + VideoState *is = opaque; + int audio_size, len1; + double pts; + + audio_callback_time = av_gettime(); + + while (len > 0) { + if (is->audio_buf_index >= is->audio_buf_size) { + audio_size = audio_decode_frame(is, is->audio_buf, &pts); + if (audio_size < 0) { + /* if error, just output silence */ + is->audio_buf_size = 1024; + memset(is->audio_buf, 0, is->audio_buf_size); + } else { + if (is->show_audio) + update_sample_display(is, (int16_t *)is->audio_buf, audio_size); + audio_size = synchronize_audio(is, (int16_t *)is->audio_buf, audio_size, + pts); + is->audio_buf_size = audio_size; + } + is->audio_buf_index = 0; + } + len1 = is->audio_buf_size - is->audio_buf_index; + if (len1 > len) + len1 = len; + memcpy(stream, (uint8_t *)is->audio_buf + is->audio_buf_index, len1); + len -= len1; + stream += len1; + is->audio_buf_index += len1; + } +} + + +/* open a given stream. Return 0 if OK */ +static int stream_component_open(VideoState *is, int stream_index) +{ + AVFormatContext *ic = is->ic; + AVCodecContext *enc; + AVCodec *codec; + SDL_AudioSpec wanted_spec, spec; + + if (stream_index < 0 || stream_index >= ic->nb_streams) + return -1; + enc = ic->streams[stream_index]->codec; + + /* prepare audio output */ + if (enc->codec_type == CODEC_TYPE_AUDIO) { + wanted_spec.freq = enc->sample_rate; + wanted_spec.format = AUDIO_S16SYS; + /* hack for AC3. XXX: suppress that */ + if (enc->channels > 2) + enc->channels = 2; + wanted_spec.channels = enc->channels; + wanted_spec.silence = 0; + wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE; + wanted_spec.callback = sdl_audio_callback; + wanted_spec.userdata = is; + if (SDL_OpenAudio(&wanted_spec, &spec) < 0) { + fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError()); + return -1; + } + is->audio_hw_buf_size = spec.size; + } + + codec = avcodec_find_decoder(enc->codec_id); + enc->debug_mv = debug_mv; + enc->debug = debug; + if(debug) + av_log_set_level(AV_LOG_DEBUG); + enc->workaround_bugs = workaround_bugs; + enc->lowres = lowres; + if(lowres) enc->flags |= CODEC_FLAG_EMU_EDGE; + enc->idct_algo= idct; + if(fast) enc->flags2 |= CODEC_FLAG2_FAST; + enc->skip_frame= skip_frame; + enc->skip_idct= skip_idct; + enc->skip_loop_filter= skip_loop_filter; + enc->error_resilience= error_resilience; + enc->error_concealment= error_concealment; + if (!codec || + avcodec_open(enc, codec) < 0) + return -1; +#if defined(HAVE_THREADS) + if(thread_count>1) + avcodec_thread_init(enc, thread_count); +#endif + enc->thread_count= thread_count; + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + is->audio_stream = stream_index; + is->audio_st = ic->streams[stream_index]; + is->audio_buf_size = 0; + is->audio_buf_index = 0; + + /* init averaging filter */ + is->audio_diff_avg_coef = exp(log(0.01) / AUDIO_DIFF_AVG_NB); + is->audio_diff_avg_count = 0; + /* since we do not have a precise anough audio fifo fullness, + we correct audio sync only if larger than this threshold */ + is->audio_diff_threshold = 2.0 * SDL_AUDIO_BUFFER_SIZE / enc->sample_rate; + + memset(&is->audio_pkt, 0, sizeof(is->audio_pkt)); + packet_queue_init(&is->audioq); + SDL_PauseAudio(0); + break; + case CODEC_TYPE_VIDEO: + is->video_stream = stream_index; + is->video_st = ic->streams[stream_index]; + + is->frame_last_delay = 40e-3; + is->frame_timer = (double)av_gettime() / 1000000.0; + is->video_current_pts_time = av_gettime(); + + packet_queue_init(&is->videoq); + is->video_tid = SDL_CreateThread(video_thread, is); + break; + case CODEC_TYPE_SUBTITLE: + is->subtitle_stream = stream_index; + is->subtitle_st = ic->streams[stream_index]; + packet_queue_init(&is->subtitleq); + + is->subtitle_tid = SDL_CreateThread(subtitle_thread, is); + break; + default: + break; + } + return 0; +} + +static void stream_component_close(VideoState *is, int stream_index) +{ + AVFormatContext *ic = is->ic; + AVCodecContext *enc; + + if (stream_index < 0 || stream_index >= ic->nb_streams) + return; + enc = ic->streams[stream_index]->codec; + + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + packet_queue_abort(&is->audioq); + + SDL_CloseAudio(); + + packet_queue_end(&is->audioq); + break; + case CODEC_TYPE_VIDEO: + packet_queue_abort(&is->videoq); + + /* note: we also signal this mutex to make sure we deblock the + video thread in all cases */ + SDL_LockMutex(is->pictq_mutex); + SDL_CondSignal(is->pictq_cond); + SDL_UnlockMutex(is->pictq_mutex); + + SDL_WaitThread(is->video_tid, NULL); + + packet_queue_end(&is->videoq); + break; + case CODEC_TYPE_SUBTITLE: + packet_queue_abort(&is->subtitleq); + + /* note: we also signal this mutex to make sure we deblock the + video thread in all cases */ + SDL_LockMutex(is->subpq_mutex); + is->subtitle_stream_changed = 1; + + SDL_CondSignal(is->subpq_cond); + SDL_UnlockMutex(is->subpq_mutex); + + SDL_WaitThread(is->subtitle_tid, NULL); + + packet_queue_end(&is->subtitleq); + break; + default: + break; + } + + avcodec_close(enc); + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + is->audio_st = NULL; + is->audio_stream = -1; + break; + case CODEC_TYPE_VIDEO: + is->video_st = NULL; + is->video_stream = -1; + break; + case CODEC_TYPE_SUBTITLE: + is->subtitle_st = NULL; + is->subtitle_stream = -1; + break; + default: + break; + } +} + +void dump_stream_info(AVFormatContext *s) +{ + if (s->track != 0) + fprintf(stderr, "Track: %d\n", s->track); + if (s->title[0] != '\0') + fprintf(stderr, "Title: %s\n", s->title); + if (s->author[0] != '\0') + fprintf(stderr, "Author: %s\n", s->author); + if (s->album[0] != '\0') + fprintf(stderr, "Album: %s\n", s->album); + if (s->year != 0) + fprintf(stderr, "Year: %d\n", s->year); + if (s->genre[0] != '\0') + fprintf(stderr, "Genre: %s\n", s->genre); +} + +/* since we have only one decoding thread, we can use a global + variable instead of a thread local variable */ +static VideoState *global_video_state; + +static int decode_interrupt_cb(void) +{ + return (global_video_state && global_video_state->abort_request); +} + +/* this thread gets the stream from the disk or the network */ +static int decode_thread(void *arg) +{ + VideoState *is = arg; + AVFormatContext *ic; + int err, i, ret, video_index, audio_index, use_play; + AVPacket pkt1, *pkt = &pkt1; + AVFormatParameters params, *ap = ¶ms; + + video_index = -1; + audio_index = -1; + is->video_stream = -1; + is->audio_stream = -1; + is->subtitle_stream = -1; + + global_video_state = is; + url_set_interrupt_cb(decode_interrupt_cb); + + memset(ap, 0, sizeof(*ap)); + ap->image_format = image_format; + ap->initial_pause = 1; /* we force a pause when starting an RTSP + stream */ + + err = av_open_input_file(&ic, is->filename, is->iformat, 0, ap); + if (err < 0) { + print_error(is->filename, err); + ret = -1; + goto fail; + } + is->ic = ic; +#ifdef CONFIG_NETWORK + use_play = (ic->iformat == &rtsp_demux); +#else + use_play = 0; +#endif + + if(genpts) + ic->flags |= AVFMT_FLAG_GENPTS; + + if (!use_play) { + err = av_find_stream_info(ic); + if (err < 0) { + fprintf(stderr, "%s: could not find codec parameters\n", is->filename); + ret = -1; + goto fail; + } + ic->pb.eof_reached= 0; //FIXME hack, ffplay maybe shouldnt use url_feof() to test for the end + } + + /* if seeking requested, we execute it */ + if (start_time != AV_NOPTS_VALUE) { + int64_t timestamp; + + timestamp = start_time; + /* add the stream start time */ + if (ic->start_time != AV_NOPTS_VALUE) + timestamp += ic->start_time; + ret = av_seek_frame(ic, -1, timestamp, AVSEEK_FLAG_BACKWARD); + if (ret < 0) { + fprintf(stderr, "%s: could not seek to position %0.3f\n", + is->filename, (double)timestamp / AV_TIME_BASE); + } + } + + /* now we can begin to play (RTSP stream only) */ + av_read_play(ic); + + if (use_play) { + err = av_find_stream_info(ic); + if (err < 0) { + fprintf(stderr, "%s: could not find codec parameters\n", is->filename); + ret = -1; + goto fail; + } + } + + for(i = 0; i < ic->nb_streams; i++) { + AVCodecContext *enc = ic->streams[i]->codec; + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: + if (audio_index < 0 && !audio_disable) + audio_index = i; + break; + case CODEC_TYPE_VIDEO: + if (video_index < 0 && !video_disable) + video_index = i; + break; + default: + break; + } + } + if (show_status) { + dump_format(ic, 0, is->filename, 0); + dump_stream_info(ic); + } + + /* open the streams */ + if (audio_index >= 0) { + stream_component_open(is, audio_index); + } + + if (video_index >= 0) { + stream_component_open(is, video_index); + } else { + if (!display_disable) + is->show_audio = 1; + } + + if (is->video_stream < 0 && is->audio_stream < 0) { + fprintf(stderr, "%s: could not open codecs\n", is->filename); + ret = -1; + goto fail; + } + + for(;;) { + if (is->abort_request) + break; +#ifdef CONFIG_NETWORK + if (is->paused != is->last_paused) { + is->last_paused = is->paused; + if (is->paused) + av_read_pause(ic); + else + av_read_play(ic); + } + if (is->paused && ic->iformat == &rtsp_demux) { + /* wait 10 ms to avoid trying to get another packet */ + /* XXX: horrible */ + SDL_Delay(10); + continue; + } +#endif + if (is->seek_req) { + /* XXX: must lock decoder threads */ + SDL_LockMutex(is->video_decoder_mutex); + SDL_LockMutex(is->audio_decoder_mutex); + SDL_LockMutex(is->subtitle_decoder_mutex); + ret = av_seek_frame(is->ic, -1, is->seek_pos, is->seek_flags); + if (ret < 0) { + fprintf(stderr, "%s: error while seeking\n", is->ic->filename); + }else{ + if (is->audio_stream >= 0) { + packet_queue_flush(&is->audioq); + } + if (is->subtitle_stream >= 0) { + packet_queue_flush(&is->subtitleq); + } + if (is->video_stream >= 0) { + packet_queue_flush(&is->videoq); + avcodec_flush_buffers(ic->streams[video_index]->codec); + } + } + SDL_UnlockMutex(is->subtitle_decoder_mutex); + SDL_UnlockMutex(is->audio_decoder_mutex); + SDL_UnlockMutex(is->video_decoder_mutex); + is->seek_req = 0; + } + + /* if the queue are full, no need to read more */ + if (is->audioq.size > MAX_AUDIOQ_SIZE || + is->videoq.size > MAX_VIDEOQ_SIZE || + is->subtitleq.size > MAX_SUBTITLEQ_SIZE || + url_feof(&ic->pb)) { + /* wait 10 ms */ + SDL_Delay(10); + continue; + } + ret = av_read_frame(ic, pkt); + if (ret < 0) { + if (url_ferror(&ic->pb) == 0) { + SDL_Delay(100); /* wait for user event */ + continue; + } else + break; + } + if (pkt->stream_index == is->audio_stream) { + packet_queue_put(&is->audioq, pkt); + } else if (pkt->stream_index == is->video_stream) { + packet_queue_put(&is->videoq, pkt); + } else if (pkt->stream_index == is->subtitle_stream) { + packet_queue_put(&is->subtitleq, pkt); + } else { + av_free_packet(pkt); + } + } + /* wait until the end */ + while (!is->abort_request) { + SDL_Delay(100); + } + + ret = 0; + fail: + /* disable interrupting */ + global_video_state = NULL; + + /* close each stream */ + if (is->audio_stream >= 0) + stream_component_close(is, is->audio_stream); + if (is->video_stream >= 0) + stream_component_close(is, is->video_stream); + if (is->subtitle_stream >= 0) + stream_component_close(is, is->subtitle_stream); + if (is->ic) { + av_close_input_file(is->ic); + is->ic = NULL; /* safety */ + } + url_set_interrupt_cb(NULL); + + if (ret != 0) { + SDL_Event event; + + event.type = FF_QUIT_EVENT; + event.user.data1 = is; + SDL_PushEvent(&event); + } + return 0; +} + +static VideoState *stream_open(const char *filename, AVInputFormat *iformat) +{ + VideoState *is; + + is = av_mallocz(sizeof(VideoState)); + if (!is) + return NULL; + pstrcpy(is->filename, sizeof(is->filename), filename); + is->iformat = iformat; + if (screen) { + is->width = screen->w; + is->height = screen->h; + } + is->ytop = 0; + is->xleft = 0; + + /* start video display */ + is->pictq_mutex = SDL_CreateMutex(); + is->pictq_cond = SDL_CreateCond(); + + is->subpq_mutex = SDL_CreateMutex(); + is->subpq_cond = SDL_CreateCond(); + + is->subtitle_decoder_mutex = SDL_CreateMutex(); + is->audio_decoder_mutex = SDL_CreateMutex(); + is->video_decoder_mutex = SDL_CreateMutex(); + + /* add the refresh timer to draw the picture */ + schedule_refresh(is, 40); + + is->av_sync_type = av_sync_type; + is->parse_tid = SDL_CreateThread(decode_thread, is); + if (!is->parse_tid) { + av_free(is); + return NULL; + } + return is; +} + +static void stream_close(VideoState *is) +{ + VideoPicture *vp; + int i; + /* XXX: use a special url_shutdown call to abort parse cleanly */ + is->abort_request = 1; + SDL_WaitThread(is->parse_tid, NULL); + + /* free all pictures */ + for(i=0;ipictq[i]; + if (vp->bmp) { + SDL_FreeYUVOverlay(vp->bmp); + vp->bmp = NULL; + } + } + SDL_DestroyMutex(is->pictq_mutex); + SDL_DestroyCond(is->pictq_cond); + SDL_DestroyMutex(is->subpq_mutex); + SDL_DestroyCond(is->subpq_cond); + SDL_DestroyMutex(is->subtitle_decoder_mutex); + SDL_DestroyMutex(is->audio_decoder_mutex); + SDL_DestroyMutex(is->video_decoder_mutex); +} + +void stream_cycle_channel(VideoState *is, int codec_type) +{ + AVFormatContext *ic = is->ic; + int start_index, stream_index; + AVStream *st; + + if (codec_type == CODEC_TYPE_VIDEO) + start_index = is->video_stream; + else if (codec_type == CODEC_TYPE_AUDIO) + start_index = is->audio_stream; + else + start_index = is->subtitle_stream; + if (start_index < (codec_type == CODEC_TYPE_SUBTITLE ? -1 : 0)) + return; + stream_index = start_index; + for(;;) { + if (++stream_index >= is->ic->nb_streams) + { + if (codec_type == CODEC_TYPE_SUBTITLE) + { + stream_index = -1; + goto the_end; + } else + stream_index = 0; + } + if (stream_index == start_index) + return; + st = ic->streams[stream_index]; + if (st->codec->codec_type == codec_type) { + /* check that parameters are OK */ + switch(codec_type) { + case CODEC_TYPE_AUDIO: + if (st->codec->sample_rate != 0 && + st->codec->channels != 0) + goto the_end; + break; + case CODEC_TYPE_VIDEO: + case CODEC_TYPE_SUBTITLE: + goto the_end; + default: + break; + } + } + } + the_end: + stream_component_close(is, start_index); + stream_component_open(is, stream_index); +} + + +void toggle_full_screen(void) +{ + int w, h, flags; + is_full_screen = !is_full_screen; + if (!fs_screen_width) { + /* use default SDL method */ + SDL_WM_ToggleFullScreen(screen); + } else { + /* use the recorded resolution */ + flags = SDL_HWSURFACE|SDL_ASYNCBLIT|SDL_HWACCEL; + if (is_full_screen) { + w = fs_screen_width; + h = fs_screen_height; + flags |= SDL_FULLSCREEN; + } else { + w = screen_width; + h = screen_height; + flags |= SDL_RESIZABLE; + } + screen = SDL_SetVideoMode(w, h, 0, flags); + cur_stream->width = w; + cur_stream->height = h; + } +} + +void toggle_pause(void) +{ + if (cur_stream) + stream_pause(cur_stream); + step = 0; +} + +void step_to_next_frame(void) +{ + if (cur_stream) { + if (cur_stream->paused) + cur_stream->paused=0; + cur_stream->video_current_pts = get_video_clock(cur_stream); + } + step = 1; +} + +void do_exit(void) +{ + if (cur_stream) { + stream_close(cur_stream); + cur_stream = NULL; + } + if (show_status) + printf("\n"); + SDL_Quit(); + exit(0); +} + +void toggle_audio_display(void) +{ + if (cur_stream) { + cur_stream->show_audio = !cur_stream->show_audio; + } +} + +/* handle an event sent by the GUI */ +void event_loop(void) +{ + SDL_Event event; + double incr, pos, frac; + + for(;;) { + SDL_WaitEvent(&event); + switch(event.type) { + case SDL_KEYDOWN: + switch(event.key.keysym.sym) { + case SDLK_ESCAPE: + case SDLK_q: + do_exit(); + break; + case SDLK_f: + toggle_full_screen(); + break; + case SDLK_p: + case SDLK_SPACE: + toggle_pause(); + break; + case SDLK_s: //S: Step to next frame + step_to_next_frame(); + break; + case SDLK_a: + if (cur_stream) + stream_cycle_channel(cur_stream, CODEC_TYPE_AUDIO); + break; + case SDLK_v: + if (cur_stream) + stream_cycle_channel(cur_stream, CODEC_TYPE_VIDEO); + break; + case SDLK_t: + if (cur_stream) + stream_cycle_channel(cur_stream, CODEC_TYPE_SUBTITLE); + break; + case SDLK_w: + toggle_audio_display(); + break; + case SDLK_LEFT: + incr = -10.0; + goto do_seek; + case SDLK_RIGHT: + incr = 10.0; + goto do_seek; + case SDLK_UP: + incr = 60.0; + goto do_seek; + case SDLK_DOWN: + incr = -60.0; + do_seek: + if (cur_stream) { + pos = get_master_clock(cur_stream); + pos += incr; + stream_seek(cur_stream, (int64_t)(pos * AV_TIME_BASE), incr); + } + break; + default: + break; + } + break; + case SDL_MOUSEBUTTONDOWN: + if (cur_stream) { + int ns, hh, mm, ss; + int tns, thh, tmm, tss; + tns = cur_stream->ic->duration/1000000LL; + thh = tns/3600; + tmm = (tns%3600)/60; + tss = (tns%60); + frac = (double)event.button.x/(double)cur_stream->width; + ns = frac*tns; + hh = ns/3600; + mm = (ns%3600)/60; + ss = (ns%60); + fprintf(stderr, "Seek to %2.0f%% (%2d:%02d:%02d) of total duration (%2d:%02d:%02d) \n", frac*100, + hh, mm, ss, thh, tmm, tss); + stream_seek(cur_stream, (int64_t)(cur_stream->ic->start_time+frac*cur_stream->ic->duration), 0); + } + break; + case SDL_VIDEORESIZE: + if (cur_stream) { + screen = SDL_SetVideoMode(event.resize.w, event.resize.h, 0, + SDL_HWSURFACE|SDL_RESIZABLE|SDL_ASYNCBLIT|SDL_HWACCEL); + cur_stream->width = event.resize.w; + cur_stream->height = event.resize.h; + } + break; + case SDL_QUIT: + case FF_QUIT_EVENT: + do_exit(); + break; + case FF_ALLOC_EVENT: + alloc_picture(event.user.data1); + break; + case FF_REFRESH_EVENT: + video_refresh_timer(event.user.data1); + break; + default: + break; + } + } +} + +void opt_width(const char *arg) +{ + screen_width = atoi(arg); +} + +void opt_height(const char *arg) +{ + screen_height = atoi(arg); +} + +static void opt_format(const char *arg) +{ + file_iformat = av_find_input_format(arg); + if (!file_iformat) { + fprintf(stderr, "Unknown input format: %s\n", arg); + exit(1); + } +} + +static void opt_image_format(const char *arg) +{ + AVImageFormat *f; + + for(f = first_image_format; f != NULL; f = f->next) { + if (!strcmp(arg, f->name)) + break; + } + if (!f) { + fprintf(stderr, "Unknown image format: '%s'\n", arg); + exit(1); + } + image_format = f; +} + +#ifdef CONFIG_NETWORK +void opt_rtp_tcp(void) +{ + /* only tcp protocol */ + rtsp_default_protocols = (1 << RTSP_PROTOCOL_RTP_TCP); +} +#endif + +void opt_sync(const char *arg) +{ + if (!strcmp(arg, "audio")) + av_sync_type = AV_SYNC_AUDIO_MASTER; + else if (!strcmp(arg, "video")) + av_sync_type = AV_SYNC_VIDEO_MASTER; + else if (!strcmp(arg, "ext")) + av_sync_type = AV_SYNC_EXTERNAL_CLOCK; + else + show_help(); +} + +void opt_seek(const char *arg) +{ + start_time = parse_date(arg, 1); +} + +static void opt_debug(const char *arg) +{ + debug = atoi(arg); +} + +static void opt_vismv(const char *arg) +{ + debug_mv = atoi(arg); +} + +static void opt_thread_count(const char *arg) +{ + thread_count= atoi(arg); +#if !defined(HAVE_THREADS) + fprintf(stderr, "Warning: not compiled with thread support, using thread emulation\n"); +#endif +} + +const OptionDef options[] = { + { "h", 0, {(void*)show_help}, "show help" }, + { "x", HAS_ARG, {(void*)opt_width}, "force displayed width", "width" }, + { "y", HAS_ARG, {(void*)opt_height}, "force displayed height", "height" }, +#if 0 + /* disabled as SDL/X11 does not support it correctly on application launch */ + { "fs", OPT_BOOL, {(void*)&is_full_screen}, "force full screen" }, +#endif + { "an", OPT_BOOL, {(void*)&audio_disable}, "disable audio" }, + { "vn", OPT_BOOL, {(void*)&video_disable}, "disable video" }, + { "ss", HAS_ARG, {(void*)&opt_seek}, "seek to a given position in seconds", "pos" }, + { "nodisp", OPT_BOOL, {(void*)&display_disable}, "disable graphical display" }, + { "f", HAS_ARG, {(void*)opt_format}, "force format", "fmt" }, + { "img", HAS_ARG, {(void*)opt_image_format}, "force image format", "img_fmt" }, + { "stats", OPT_BOOL | OPT_EXPERT, {(void*)&show_status}, "show status", "" }, + { "debug", HAS_ARG | OPT_EXPERT, {(void*)opt_debug}, "print specific debug info", "" }, + { "bug", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&workaround_bugs}, "workaround bugs", "" }, + { "vismv", HAS_ARG | OPT_EXPERT, {(void*)opt_vismv}, "visualize motion vectors", "" }, + { "fast", OPT_BOOL | OPT_EXPERT, {(void*)&fast}, "non spec compliant optimizations", "" }, + { "genpts", OPT_BOOL | OPT_EXPERT, {(void*)&genpts}, "generate pts", "" }, + { "lowres", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&lowres}, "", "" }, + { "skiploop", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&skip_loop_filter}, "", "" }, + { "skipframe", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&skip_frame}, "", "" }, + { "skipidct", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&skip_idct}, "", "" }, + { "idct", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&idct}, "set idct algo", "algo" }, + { "er", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_resilience}, "set error detection threshold (0-4)", "threshold" }, + { "ec", OPT_INT | HAS_ARG | OPT_EXPERT, {(void*)&error_concealment}, "set error concealment options", "bit_mask" }, +#ifdef CONFIG_NETWORK + { "rtp_tcp", OPT_EXPERT, {(void*)&opt_rtp_tcp}, "force RTP/TCP protocol usage", "" }, +#endif + { "sync", HAS_ARG | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" }, + { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, + { NULL, }, +}; + +void show_help(void) +{ + printf("ffplay version " FFMPEG_VERSION ", Copyright (c) 2003 Fabrice Bellard\n" + "usage: ffplay [options] input_file\n" + "Simple media player\n"); + printf("\n"); + show_help_options(options, "Main options:\n", + OPT_EXPERT, 0); + show_help_options(options, "\nAdvanced options:\n", + OPT_EXPERT, OPT_EXPERT); + printf("\nWhile playing:\n" + "q, ESC quit\n" + "f toggle full screen\n" + "p, SPC pause\n" + "a cycle audio channel\n" + "v cycle video channel\n" + "t cycle subtitle channel\n" + "w show audio waves\n" + "left/right seek backward/forward 10 seconds\n" + "down/up seek backward/forward 1 minute\n" + "mouse click seek to percentage in file corresponding to fraction of width\n" + ); + exit(1); +} + +void parse_arg_file(const char *filename) +{ + if (!strcmp(filename, "-")) + filename = "pipe:"; + input_filename = filename; +} + +/* Called from the main */ +int main(int argc, char **argv) +{ + int flags, w, h; + + /* register all codecs, demux and protocols */ + av_register_all(); + + #ifdef CONFIG_OS2 + MorphToPM(); // Morph the VIO application to a PM one to be able to use Win* functions + + // Make stdout and stderr unbuffered + setbuf( stdout, NULL ); + setbuf( stderr, NULL ); + #endif + + parse_options(argc, argv, options); + + if (!input_filename) + show_help(); + + if (display_disable) { + video_disable = 1; + } + flags = SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER; +#ifndef CONFIG_WIN32 + flags |= SDL_INIT_EVENTTHREAD; /* Not supported on win32 */ +#endif + if (SDL_Init (flags)) { + fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError()); + exit(1); + } + + if (!display_disable) { +#ifdef HAVE_X11 + /* save the screen resolution... SDL should allow full screen + by resizing the window */ + { + Display *dpy; + dpy = XOpenDisplay(NULL); + if (dpy) { + fs_screen_width = DisplayWidth(dpy, DefaultScreen(dpy)); + fs_screen_height = DisplayHeight(dpy, DefaultScreen(dpy)); + XCloseDisplay(dpy); + } + } +#endif + flags = SDL_HWSURFACE|SDL_ASYNCBLIT|SDL_HWACCEL; + if (is_full_screen && fs_screen_width) { + w = fs_screen_width; + h = fs_screen_height; + flags |= SDL_FULLSCREEN; + } else { + w = screen_width; + h = screen_height; + flags |= SDL_RESIZABLE; + } + screen = SDL_SetVideoMode(w, h, 0, flags); + if (!screen) { + fprintf(stderr, "SDL: could not set video mode - exiting\n"); + exit(1); + } + SDL_WM_SetCaption("FFplay", "FFplay"); + } + + SDL_EventState(SDL_ACTIVEEVENT, SDL_IGNORE); + SDL_EventState(SDL_MOUSEMOTION, SDL_IGNORE); + SDL_EventState(SDL_SYSWMEVENT, SDL_IGNORE); + SDL_EventState(SDL_USEREVENT, SDL_IGNORE); + + cur_stream = stream_open(input_filename, file_iformat); + + event_loop(); + + /* never returns */ + + return 0; +} diff --git a/mpeg4/src/ffserver.c b/mpeg4/src/ffserver.c new file mode 100644 index 0000000000000000000000000000000000000000..e31a02706c21df741c5709f44ab678ece2b1d539 --- /dev/null +++ b/mpeg4/src/ffserver.c @@ -0,0 +1,4599 @@ +/* + * Multiple format streaming server + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#define HAVE_AV_CONFIG_H +#include "avformat.h" + +#include +#include +#include +#include +#include +#include +#include +#undef time //needed because HAVE_AV_CONFIG_H is defined on top +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_HAVE_DLFCN +#include +#endif + +#include "ffserver.h" + +/* maximum number of simultaneous HTTP connections */ +#define HTTP_MAX_CONNECTIONS 2000 + +enum HTTPState { + HTTPSTATE_WAIT_REQUEST, + HTTPSTATE_SEND_HEADER, + HTTPSTATE_SEND_DATA_HEADER, + HTTPSTATE_SEND_DATA, /* sending TCP or UDP data */ + HTTPSTATE_SEND_DATA_TRAILER, + HTTPSTATE_RECEIVE_DATA, + HTTPSTATE_WAIT_FEED, /* wait for data from the feed */ + HTTPSTATE_READY, + + RTSPSTATE_WAIT_REQUEST, + RTSPSTATE_SEND_REPLY, + RTSPSTATE_SEND_PACKET, +}; + +const char *http_state[] = { + "HTTP_WAIT_REQUEST", + "HTTP_SEND_HEADER", + + "SEND_DATA_HEADER", + "SEND_DATA", + "SEND_DATA_TRAILER", + "RECEIVE_DATA", + "WAIT_FEED", + "READY", + + "RTSP_WAIT_REQUEST", + "RTSP_SEND_REPLY", + "RTSP_SEND_PACKET", +}; + +#define IOBUFFER_INIT_SIZE 8192 + +/* coef for exponential mean for bitrate estimation in statistics */ +#define AVG_COEF 0.9 + +/* timeouts are in ms */ +#define HTTP_REQUEST_TIMEOUT (15 * 1000) +#define RTSP_REQUEST_TIMEOUT (3600 * 24 * 1000) + +#define SYNC_TIMEOUT (10 * 1000) + +typedef struct { + int64_t count1, count2; + long time1, time2; +} DataRateData; + +/* context associated with one connection */ +typedef struct HTTPContext { + enum HTTPState state; + int fd; /* socket file descriptor */ + struct sockaddr_in from_addr; /* origin */ + struct pollfd *poll_entry; /* used when polling */ + long timeout; + uint8_t *buffer_ptr, *buffer_end; + int http_error; + int post; + struct HTTPContext *next; + int got_key_frame; /* stream 0 => 1, stream 1 => 2, stream 2=> 4 */ + int64_t data_count; + /* feed input */ + int feed_fd; + /* input format handling */ + AVFormatContext *fmt_in; + long start_time; /* In milliseconds - this wraps fairly often */ + int64_t first_pts; /* initial pts value */ + int64_t cur_pts; /* current pts value from the stream in us */ + int64_t cur_frame_duration; /* duration of the current frame in us */ + int cur_frame_bytes; /* output frame size, needed to compute + the time at which we send each + packet */ + int pts_stream_index; /* stream we choose as clock reference */ + int64_t cur_clock; /* current clock reference value in us */ + /* output format handling */ + struct FFStream *stream; + /* -1 is invalid stream */ + int feed_streams[MAX_STREAMS]; /* index of streams in the feed */ + int switch_feed_streams[MAX_STREAMS]; /* index of streams in the feed */ + int switch_pending; + AVFormatContext fmt_ctx; /* instance of FFStream for one user */ + int last_packet_sent; /* true if last data packet was sent */ + int suppress_log; + DataRateData datarate; + int wmp_client_id; + char protocol[16]; + char method[16]; + char url[128]; + int buffer_size; + uint8_t *buffer; + int is_packetized; /* if true, the stream is packetized */ + int packet_stream_index; /* current stream for output in state machine */ + + /* RTSP state specific */ + uint8_t *pb_buffer; /* XXX: use that in all the code */ + ByteIOContext *pb; + int seq; /* RTSP sequence number */ + + /* RTP state specific */ + enum RTSPProtocol rtp_protocol; + char session_id[32]; /* session id */ + AVFormatContext *rtp_ctx[MAX_STREAMS]; + + /* RTP/UDP specific */ + URLContext *rtp_handles[MAX_STREAMS]; + + /* RTP/TCP specific */ + struct HTTPContext *rtsp_c; + uint8_t *packet_buffer, *packet_buffer_ptr, *packet_buffer_end; +} HTTPContext; + +static AVFrame dummy_frame; + +/* each generated stream is described here */ +enum StreamType { + STREAM_TYPE_LIVE, + STREAM_TYPE_STATUS, + STREAM_TYPE_REDIRECT, +}; + +enum IPAddressAction { + IP_ALLOW = 1, + IP_DENY, +}; + +typedef struct IPAddressACL { + struct IPAddressACL *next; + enum IPAddressAction action; + /* These are in host order */ + struct in_addr first; + struct in_addr last; +} IPAddressACL; + +/* description of each stream of the ffserver.conf file */ +typedef struct FFStream { + enum StreamType stream_type; + char filename[1024]; /* stream filename */ + struct FFStream *feed; /* feed we are using (can be null if + coming from file) */ + AVFormatParameters *ap_in; /* input parameters */ + AVInputFormat *ifmt; /* if non NULL, force input format */ + AVOutputFormat *fmt; + IPAddressACL *acl; + int nb_streams; + int prebuffer; /* Number of millseconds early to start */ + long max_time; /* Number of milliseconds to run */ + int send_on_key; + AVStream *streams[MAX_STREAMS]; + int feed_streams[MAX_STREAMS]; /* index of streams in the feed */ + char feed_filename[1024]; /* file name of the feed storage, or + input file name for a stream */ + char author[512]; + char title[512]; + char copyright[512]; + char comment[512]; + pid_t pid; /* Of ffmpeg process */ + time_t pid_start; /* Of ffmpeg process */ + char **child_argv; + struct FFStream *next; + int bandwidth; /* bandwidth, in kbits/s */ + /* RTSP options */ + char *rtsp_option; + /* multicast specific */ + int is_multicast; + struct in_addr multicast_ip; + int multicast_port; /* first port used for multicast */ + int multicast_ttl; + int loop; /* if true, send the stream in loops (only meaningful if file) */ + + /* feed specific */ + int feed_opened; /* true if someone is writing to the feed */ + int is_feed; /* true if it is a feed */ + int readonly; /* True if writing is prohibited to the file */ + int conns_served; + int64_t bytes_served; + int64_t feed_max_size; /* maximum storage size, zero means unlimited */ + int64_t feed_write_index; /* current write position in feed (it wraps round) */ + int64_t feed_size; /* current size of feed */ + struct FFStream *next_feed; +} FFStream; + +typedef struct FeedData { + long long data_count; + float avg_frame_size; /* frame size averraged over last frames with exponential mean */ +} FeedData; + +struct sockaddr_in my_http_addr; +struct sockaddr_in my_rtsp_addr; + +static char logfilename[1024]; +static HTTPContext *first_http_ctx; +static FFStream *first_feed; /* contains only feeds */ +static FFStream *first_stream; /* contains all streams, including feeds */ + +static void new_connection(int server_fd, int is_rtsp); +static void close_connection(HTTPContext *c); + +/* HTTP handling */ +static int handle_connection(HTTPContext *c); +static int http_parse_request(HTTPContext *c); +static int http_send_data(HTTPContext *c); +static void compute_stats(HTTPContext *c); +static int open_input_stream(HTTPContext *c, const char *info); +static int http_start_receive_data(HTTPContext *c); +static int http_receive_data(HTTPContext *c); + +/* RTSP handling */ +static int rtsp_parse_request(HTTPContext *c); +static void rtsp_cmd_describe(HTTPContext *c, const char *url); +static void rtsp_cmd_options(HTTPContext *c, const char *url); +static void rtsp_cmd_setup(HTTPContext *c, const char *url, RTSPHeader *h); +static void rtsp_cmd_play(HTTPContext *c, const char *url, RTSPHeader *h); +static void rtsp_cmd_pause(HTTPContext *c, const char *url, RTSPHeader *h); +static void rtsp_cmd_teardown(HTTPContext *c, const char *url, RTSPHeader *h); + +/* SDP handling */ +static int prepare_sdp_description(FFStream *stream, uint8_t **pbuffer, + struct in_addr my_ip); + +/* RTP handling */ +static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr, + FFStream *stream, const char *session_id, + enum RTSPProtocol rtp_protocol); +static int rtp_new_av_stream(HTTPContext *c, + int stream_index, struct sockaddr_in *dest_addr, + HTTPContext *rtsp_c); + +static const char *my_program_name; +static const char *my_program_dir; + +static int ffserver_debug; +static int ffserver_daemon; +static int no_launch; +static int need_to_start_children; + +static int nb_max_connections; +static int nb_connections; + +static int max_bandwidth; +static int current_bandwidth; + +static long cur_time; // Making this global saves on passing it around everywhere + +static long gettime_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv,NULL); + return (long long)tv.tv_sec * 1000 + (tv.tv_usec / 1000); +} + +static FILE *logfile = NULL; + +static void __attribute__ ((format (printf, 1, 2))) http_log(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + if (logfile) { + vfprintf(logfile, fmt, ap); + fflush(logfile); + } + va_end(ap); +} + +static char *ctime1(char *buf2) +{ + time_t ti; + char *p; + + ti = time(NULL); + p = ctime(&ti); + strcpy(buf2, p); + p = buf2 + strlen(p) - 1; + if (*p == '\n') + *p = '\0'; + return buf2; +} + +static void log_connection(HTTPContext *c) +{ + char buf2[32]; + + if (c->suppress_log) + return; + + http_log("%s - - [%s] \"%s %s %s\" %d %"PRId64"\n", + inet_ntoa(c->from_addr.sin_addr), + ctime1(buf2), c->method, c->url, + c->protocol, (c->http_error ? c->http_error : 200), c->data_count); +} + +static void update_datarate(DataRateData *drd, int64_t count) +{ + if (!drd->time1 && !drd->count1) { + drd->time1 = drd->time2 = cur_time; + drd->count1 = drd->count2 = count; + } else { + if (cur_time - drd->time2 > 5000) { + drd->time1 = drd->time2; + drd->count1 = drd->count2; + drd->time2 = cur_time; + drd->count2 = count; + } + } +} + +/* In bytes per second */ +static int compute_datarate(DataRateData *drd, int64_t count) +{ + if (cur_time == drd->time1) + return 0; + + return ((count - drd->count1) * 1000) / (cur_time - drd->time1); +} + + +static void start_children(FFStream *feed) +{ + if (no_launch) + return; + + for (; feed; feed = feed->next) { + if (feed->child_argv && !feed->pid) { + feed->pid_start = time(0); + + feed->pid = fork(); + + if (feed->pid < 0) { + fprintf(stderr, "Unable to create children\n"); + exit(1); + } + if (!feed->pid) { + /* In child */ + char pathname[1024]; + char *slash; + int i; + + for (i = 3; i < 256; i++) { + close(i); + } + + if (!ffserver_debug) { + i = open("/dev/null", O_RDWR); + if (i) + dup2(i, 0); + dup2(i, 1); + dup2(i, 2); + if (i) + close(i); + } + + pstrcpy(pathname, sizeof(pathname), my_program_name); + + slash = strrchr(pathname, '/'); + if (!slash) { + slash = pathname; + } else { + slash++; + } + strcpy(slash, "ffmpeg"); + + /* This is needed to make relative pathnames work */ + chdir(my_program_dir); + + signal(SIGPIPE, SIG_DFL); + + execvp(pathname, feed->child_argv); + + _exit(1); + } + } + } +} + +/* open a listening socket */ +static int socket_open_listen(struct sockaddr_in *my_addr) +{ + int server_fd, tmp; + + server_fd = socket(AF_INET,SOCK_STREAM,0); + if (server_fd < 0) { + perror ("socket"); + return -1; + } + + tmp = 1; + setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &tmp, sizeof(tmp)); + + if (bind (server_fd, (struct sockaddr *) my_addr, sizeof (*my_addr)) < 0) { + char bindmsg[32]; + snprintf(bindmsg, sizeof(bindmsg), "bind(port %d)", ntohs(my_addr->sin_port)); + perror (bindmsg); + close(server_fd); + return -1; + } + + if (listen (server_fd, 5) < 0) { + perror ("listen"); + close(server_fd); + return -1; + } + fcntl(server_fd, F_SETFL, O_NONBLOCK); + + return server_fd; +} + +/* start all multicast streams */ +static void start_multicast(void) +{ + FFStream *stream; + char session_id[32]; + HTTPContext *rtp_c; + struct sockaddr_in dest_addr; + int default_port, stream_index; + + default_port = 6000; + for(stream = first_stream; stream != NULL; stream = stream->next) { + if (stream->is_multicast) { + /* open the RTP connection */ + snprintf(session_id, sizeof(session_id), + "%08x%08x", (int)random(), (int)random()); + + /* choose a port if none given */ + if (stream->multicast_port == 0) { + stream->multicast_port = default_port; + default_port += 100; + } + + dest_addr.sin_family = AF_INET; + dest_addr.sin_addr = stream->multicast_ip; + dest_addr.sin_port = htons(stream->multicast_port); + + rtp_c = rtp_new_connection(&dest_addr, stream, session_id, + RTSP_PROTOCOL_RTP_UDP_MULTICAST); + if (!rtp_c) { + continue; + } + if (open_input_stream(rtp_c, "") < 0) { + fprintf(stderr, "Could not open input stream for stream '%s'\n", + stream->filename); + continue; + } + + /* open each RTP stream */ + for(stream_index = 0; stream_index < stream->nb_streams; + stream_index++) { + dest_addr.sin_port = htons(stream->multicast_port + + 2 * stream_index); + if (rtp_new_av_stream(rtp_c, stream_index, &dest_addr, NULL) < 0) { + fprintf(stderr, "Could not open output stream '%s/streamid=%d'\n", + stream->filename, stream_index); + exit(1); + } + } + + /* change state to send data */ + rtp_c->state = HTTPSTATE_SEND_DATA; + } + } +} + +/* main loop of the http server */ +static int http_server(void) +{ + int server_fd, ret, rtsp_server_fd, delay, delay1; + struct pollfd poll_table[HTTP_MAX_CONNECTIONS + 2], *poll_entry; + HTTPContext *c, *c_next; + + server_fd = socket_open_listen(&my_http_addr); + if (server_fd < 0) + return -1; + + rtsp_server_fd = socket_open_listen(&my_rtsp_addr); + if (rtsp_server_fd < 0) + return -1; + + http_log("ffserver started.\n"); + + start_children(first_feed); + + first_http_ctx = NULL; + nb_connections = 0; + + start_multicast(); + + for(;;) { + poll_entry = poll_table; + poll_entry->fd = server_fd; + poll_entry->events = POLLIN; + poll_entry++; + + poll_entry->fd = rtsp_server_fd; + poll_entry->events = POLLIN; + poll_entry++; + + /* wait for events on each HTTP handle */ + c = first_http_ctx; + delay = 1000; + while (c != NULL) { + int fd; + fd = c->fd; + switch(c->state) { + case HTTPSTATE_SEND_HEADER: + case RTSPSTATE_SEND_REPLY: + case RTSPSTATE_SEND_PACKET: + c->poll_entry = poll_entry; + poll_entry->fd = fd; + poll_entry->events = POLLOUT; + poll_entry++; + break; + case HTTPSTATE_SEND_DATA_HEADER: + case HTTPSTATE_SEND_DATA: + case HTTPSTATE_SEND_DATA_TRAILER: + if (!c->is_packetized) { + /* for TCP, we output as much as we can (may need to put a limit) */ + c->poll_entry = poll_entry; + poll_entry->fd = fd; + poll_entry->events = POLLOUT; + poll_entry++; + } else { + /* when ffserver is doing the timing, we work by + looking at which packet need to be sent every + 10 ms */ + delay1 = 10; /* one tick wait XXX: 10 ms assumed */ + if (delay1 < delay) + delay = delay1; + } + break; + case HTTPSTATE_WAIT_REQUEST: + case HTTPSTATE_RECEIVE_DATA: + case HTTPSTATE_WAIT_FEED: + case RTSPSTATE_WAIT_REQUEST: + /* need to catch errors */ + c->poll_entry = poll_entry; + poll_entry->fd = fd; + poll_entry->events = POLLIN;/* Maybe this will work */ + poll_entry++; + break; + default: + c->poll_entry = NULL; + break; + } + c = c->next; + } + + /* wait for an event on one connection. We poll at least every + second to handle timeouts */ + do { + ret = poll(poll_table, poll_entry - poll_table, delay); + if (ret < 0 && errno != EAGAIN && errno != EINTR) + return -1; + } while (ret <= 0); + + cur_time = gettime_ms(); + + if (need_to_start_children) { + need_to_start_children = 0; + start_children(first_feed); + } + + /* now handle the events */ + for(c = first_http_ctx; c != NULL; c = c_next) { + c_next = c->next; + if (handle_connection(c) < 0) { + /* close and free the connection */ + log_connection(c); + close_connection(c); + } + } + + poll_entry = poll_table; + /* new HTTP connection request ? */ + if (poll_entry->revents & POLLIN) { + new_connection(server_fd, 0); + } + poll_entry++; + /* new RTSP connection request ? */ + if (poll_entry->revents & POLLIN) { + new_connection(rtsp_server_fd, 1); + } + } +} + +/* start waiting for a new HTTP/RTSP request */ +static void start_wait_request(HTTPContext *c, int is_rtsp) +{ + c->buffer_ptr = c->buffer; + c->buffer_end = c->buffer + c->buffer_size - 1; /* leave room for '\0' */ + + if (is_rtsp) { + c->timeout = cur_time + RTSP_REQUEST_TIMEOUT; + c->state = RTSPSTATE_WAIT_REQUEST; + } else { + c->timeout = cur_time + HTTP_REQUEST_TIMEOUT; + c->state = HTTPSTATE_WAIT_REQUEST; + } +} + +static void new_connection(int server_fd, int is_rtsp) +{ + struct sockaddr_in from_addr; + int fd, len; + HTTPContext *c = NULL; + + len = sizeof(from_addr); + fd = accept(server_fd, (struct sockaddr *)&from_addr, + &len); + if (fd < 0) + return; + fcntl(fd, F_SETFL, O_NONBLOCK); + + /* XXX: should output a warning page when coming + close to the connection limit */ + if (nb_connections >= nb_max_connections) + goto fail; + + /* add a new connection */ + c = av_mallocz(sizeof(HTTPContext)); + if (!c) + goto fail; + + c->fd = fd; + c->poll_entry = NULL; + c->from_addr = from_addr; + c->buffer_size = IOBUFFER_INIT_SIZE; + c->buffer = av_malloc(c->buffer_size); + if (!c->buffer) + goto fail; + + c->next = first_http_ctx; + first_http_ctx = c; + nb_connections++; + + start_wait_request(c, is_rtsp); + + return; + + fail: + if (c) { + av_free(c->buffer); + av_free(c); + } + close(fd); +} + +static void close_connection(HTTPContext *c) +{ + HTTPContext **cp, *c1; + int i, nb_streams; + AVFormatContext *ctx; + URLContext *h; + AVStream *st; + + /* remove connection from list */ + cp = &first_http_ctx; + while ((*cp) != NULL) { + c1 = *cp; + if (c1 == c) { + *cp = c->next; + } else { + cp = &c1->next; + } + } + + /* remove references, if any (XXX: do it faster) */ + for(c1 = first_http_ctx; c1 != NULL; c1 = c1->next) { + if (c1->rtsp_c == c) + c1->rtsp_c = NULL; + } + + /* remove connection associated resources */ + if (c->fd >= 0) + close(c->fd); + if (c->fmt_in) { + /* close each frame parser */ + for(i=0;ifmt_in->nb_streams;i++) { + st = c->fmt_in->streams[i]; + if (st->codec->codec) { + avcodec_close(st->codec); + } + } + av_close_input_file(c->fmt_in); + } + + /* free RTP output streams if any */ + nb_streams = 0; + if (c->stream) + nb_streams = c->stream->nb_streams; + + for(i=0;irtp_ctx[i]; + if (ctx) { + av_write_trailer(ctx); + av_free(ctx); + } + h = c->rtp_handles[i]; + if (h) { + url_close(h); + } + } + + ctx = &c->fmt_ctx; + + if (!c->last_packet_sent) { + if (ctx->oformat) { + /* prepare header */ + if (url_open_dyn_buf(&ctx->pb) >= 0) { + av_write_trailer(ctx); + url_close_dyn_buf(&ctx->pb, &c->pb_buffer); + } + } + } + + for(i=0; inb_streams; i++) + av_free(ctx->streams[i]) ; + + if (c->stream && !c->post && c->stream->stream_type == STREAM_TYPE_LIVE) + current_bandwidth -= c->stream->bandwidth; + av_freep(&c->pb_buffer); + av_freep(&c->packet_buffer); + av_free(c->buffer); + av_free(c); + nb_connections--; +} + +static int handle_connection(HTTPContext *c) +{ + int len, ret; + + switch(c->state) { + case HTTPSTATE_WAIT_REQUEST: + case RTSPSTATE_WAIT_REQUEST: + /* timeout ? */ + if ((c->timeout - cur_time) < 0) + return -1; + if (c->poll_entry->revents & (POLLERR | POLLHUP)) + return -1; + + /* no need to read if no events */ + if (!(c->poll_entry->revents & POLLIN)) + return 0; + /* read the data */ + read_loop: + len = read(c->fd, c->buffer_ptr, 1); + if (len < 0) { + if (errno != EAGAIN && errno != EINTR) + return -1; + } else if (len == 0) { + return -1; + } else { + /* search for end of request. */ + uint8_t *ptr; + c->buffer_ptr += len; + ptr = c->buffer_ptr; + if ((ptr >= c->buffer + 2 && !memcmp(ptr-2, "\n\n", 2)) || + (ptr >= c->buffer + 4 && !memcmp(ptr-4, "\r\n\r\n", 4))) { + /* request found : parse it and reply */ + if (c->state == HTTPSTATE_WAIT_REQUEST) { + ret = http_parse_request(c); + } else { + ret = rtsp_parse_request(c); + } + if (ret < 0) + return -1; + } else if (ptr >= c->buffer_end) { + /* request too long: cannot do anything */ + return -1; + } else goto read_loop; + } + break; + + case HTTPSTATE_SEND_HEADER: + if (c->poll_entry->revents & (POLLERR | POLLHUP)) + return -1; + + /* no need to write if no events */ + if (!(c->poll_entry->revents & POLLOUT)) + return 0; + len = write(c->fd, c->buffer_ptr, c->buffer_end - c->buffer_ptr); + if (len < 0) { + if (errno != EAGAIN && errno != EINTR) { + /* error : close connection */ + av_freep(&c->pb_buffer); + return -1; + } + } else { + c->buffer_ptr += len; + if (c->stream) + c->stream->bytes_served += len; + c->data_count += len; + if (c->buffer_ptr >= c->buffer_end) { + av_freep(&c->pb_buffer); + /* if error, exit */ + if (c->http_error) { + return -1; + } + /* all the buffer was sent : synchronize to the incoming stream */ + c->state = HTTPSTATE_SEND_DATA_HEADER; + c->buffer_ptr = c->buffer_end = c->buffer; + } + } + break; + + case HTTPSTATE_SEND_DATA: + case HTTPSTATE_SEND_DATA_HEADER: + case HTTPSTATE_SEND_DATA_TRAILER: + /* for packetized output, we consider we can always write (the + input streams sets the speed). It may be better to verify + that we do not rely too much on the kernel queues */ + if (!c->is_packetized) { + if (c->poll_entry->revents & (POLLERR | POLLHUP)) + return -1; + + /* no need to read if no events */ + if (!(c->poll_entry->revents & POLLOUT)) + return 0; + } + if (http_send_data(c) < 0) + return -1; + break; + case HTTPSTATE_RECEIVE_DATA: + /* no need to read if no events */ + if (c->poll_entry->revents & (POLLERR | POLLHUP)) + return -1; + if (!(c->poll_entry->revents & POLLIN)) + return 0; + if (http_receive_data(c) < 0) + return -1; + break; + case HTTPSTATE_WAIT_FEED: + /* no need to read if no events */ + if (c->poll_entry->revents & (POLLIN | POLLERR | POLLHUP)) + return -1; + + /* nothing to do, we'll be waken up by incoming feed packets */ + break; + + case RTSPSTATE_SEND_REPLY: + if (c->poll_entry->revents & (POLLERR | POLLHUP)) { + av_freep(&c->pb_buffer); + return -1; + } + /* no need to write if no events */ + if (!(c->poll_entry->revents & POLLOUT)) + return 0; + len = write(c->fd, c->buffer_ptr, c->buffer_end - c->buffer_ptr); + if (len < 0) { + if (errno != EAGAIN && errno != EINTR) { + /* error : close connection */ + av_freep(&c->pb_buffer); + return -1; + } + } else { + c->buffer_ptr += len; + c->data_count += len; + if (c->buffer_ptr >= c->buffer_end) { + /* all the buffer was sent : wait for a new request */ + av_freep(&c->pb_buffer); + start_wait_request(c, 1); + } + } + break; + case RTSPSTATE_SEND_PACKET: + if (c->poll_entry->revents & (POLLERR | POLLHUP)) { + av_freep(&c->packet_buffer); + return -1; + } + /* no need to write if no events */ + if (!(c->poll_entry->revents & POLLOUT)) + return 0; + len = write(c->fd, c->packet_buffer_ptr, + c->packet_buffer_end - c->packet_buffer_ptr); + if (len < 0) { + if (errno != EAGAIN && errno != EINTR) { + /* error : close connection */ + av_freep(&c->packet_buffer); + return -1; + } + } else { + c->packet_buffer_ptr += len; + if (c->packet_buffer_ptr >= c->packet_buffer_end) { + /* all the buffer was sent : wait for a new request */ + av_freep(&c->packet_buffer); + c->state = RTSPSTATE_WAIT_REQUEST; + } + } + break; + case HTTPSTATE_READY: + /* nothing to do */ + break; + default: + return -1; + } + return 0; +} + +static int extract_rates(char *rates, int ratelen, const char *request) +{ + const char *p; + + for (p = request; *p && *p != '\r' && *p != '\n'; ) { + if (strncasecmp(p, "Pragma:", 7) == 0) { + const char *q = p + 7; + + while (*q && *q != '\n' && isspace(*q)) + q++; + + if (strncasecmp(q, "stream-switch-entry=", 20) == 0) { + int stream_no; + int rate_no; + + q += 20; + + memset(rates, 0xff, ratelen); + + while (1) { + while (*q && *q != '\n' && *q != ':') + q++; + + if (sscanf(q, ":%d:%d", &stream_no, &rate_no) != 2) { + break; + } + stream_no--; + if (stream_no < ratelen && stream_no >= 0) { + rates[stream_no] = rate_no; + } + + while (*q && *q != '\n' && !isspace(*q)) + q++; + } + + return 1; + } + } + p = strchr(p, '\n'); + if (!p) + break; + + p++; + } + + return 0; +} + +static int find_stream_in_feed(FFStream *feed, AVCodecContext *codec, int bit_rate) +{ + int i; + int best_bitrate = 100000000; + int best = -1; + + for (i = 0; i < feed->nb_streams; i++) { + AVCodecContext *feed_codec = feed->streams[i]->codec; + + if (feed_codec->codec_id != codec->codec_id || + feed_codec->sample_rate != codec->sample_rate || + feed_codec->width != codec->width || + feed_codec->height != codec->height) { + continue; + } + + /* Potential stream */ + + /* We want the fastest stream less than bit_rate, or the slowest + * faster than bit_rate + */ + + if (feed_codec->bit_rate <= bit_rate) { + if (best_bitrate > bit_rate || feed_codec->bit_rate > best_bitrate) { + best_bitrate = feed_codec->bit_rate; + best = i; + } + } else { + if (feed_codec->bit_rate < best_bitrate) { + best_bitrate = feed_codec->bit_rate; + best = i; + } + } + } + + return best; +} + +static int modify_current_stream(HTTPContext *c, char *rates) +{ + int i; + FFStream *req = c->stream; + int action_required = 0; + + /* Not much we can do for a feed */ + if (!req->feed) + return 0; + + for (i = 0; i < req->nb_streams; i++) { + AVCodecContext *codec = req->streams[i]->codec; + + switch(rates[i]) { + case 0: + c->switch_feed_streams[i] = req->feed_streams[i]; + break; + case 1: + c->switch_feed_streams[i] = find_stream_in_feed(req->feed, codec, codec->bit_rate / 2); + break; + case 2: + /* Wants off or slow */ + c->switch_feed_streams[i] = find_stream_in_feed(req->feed, codec, codec->bit_rate / 4); +#ifdef WANTS_OFF + /* This doesn't work well when it turns off the only stream! */ + c->switch_feed_streams[i] = -2; + c->feed_streams[i] = -2; +#endif + break; + } + + if (c->switch_feed_streams[i] >= 0 && c->switch_feed_streams[i] != c->feed_streams[i]) + action_required = 1; + } + + return action_required; +} + + +static void do_switch_stream(HTTPContext *c, int i) +{ + if (c->switch_feed_streams[i] >= 0) { +#ifdef PHILIP + c->feed_streams[i] = c->switch_feed_streams[i]; +#endif + + /* Now update the stream */ + } + c->switch_feed_streams[i] = -1; +} + +/* XXX: factorize in utils.c ? */ +/* XXX: take care with different space meaning */ +static void skip_spaces(const char **pp) +{ + const char *p; + p = *pp; + while (*p == ' ' || *p == '\t') + p++; + *pp = p; +} + +static void get_word(char *buf, int buf_size, const char **pp) +{ + const char *p; + char *q; + + p = *pp; + skip_spaces(&p); + q = buf; + while (!isspace(*p) && *p != '\0') { + if ((q - buf) < buf_size - 1) + *q++ = *p; + p++; + } + if (buf_size > 0) + *q = '\0'; + *pp = p; +} + +static int validate_acl(FFStream *stream, HTTPContext *c) +{ + enum IPAddressAction last_action = IP_DENY; + IPAddressACL *acl; + struct in_addr *src = &c->from_addr.sin_addr; + unsigned long src_addr = ntohl(src->s_addr); + + for (acl = stream->acl; acl; acl = acl->next) { + if (src_addr >= acl->first.s_addr && src_addr <= acl->last.s_addr) { + return (acl->action == IP_ALLOW) ? 1 : 0; + } + last_action = acl->action; + } + + /* Nothing matched, so return not the last action */ + return (last_action == IP_DENY) ? 1 : 0; +} + +/* compute the real filename of a file by matching it without its + extensions to all the stream filenames */ +static void compute_real_filename(char *filename, int max_size) +{ + char file1[1024]; + char file2[1024]; + char *p; + FFStream *stream; + + /* compute filename by matching without the file extensions */ + pstrcpy(file1, sizeof(file1), filename); + p = strrchr(file1, '.'); + if (p) + *p = '\0'; + for(stream = first_stream; stream != NULL; stream = stream->next) { + pstrcpy(file2, sizeof(file2), stream->filename); + p = strrchr(file2, '.'); + if (p) + *p = '\0'; + if (!strcmp(file1, file2)) { + pstrcpy(filename, max_size, stream->filename); + break; + } + } +} + +enum RedirType { + REDIR_NONE, + REDIR_ASX, + REDIR_RAM, + REDIR_ASF, + REDIR_RTSP, + REDIR_SDP, +}; + +/* parse http request and prepare header */ +static int http_parse_request(HTTPContext *c) +{ + char *p; + enum RedirType redir_type; + char cmd[32]; + char info[1024], *filename; + char url[1024], *q; + char protocol[32]; + char msg[1024]; + const char *mime_type; + FFStream *stream; + int i; + char ratebuf[32]; + char *useragent = 0; + + p = c->buffer; + get_word(cmd, sizeof(cmd), (const char **)&p); + pstrcpy(c->method, sizeof(c->method), cmd); + + if (!strcmp(cmd, "GET")) + c->post = 0; + else if (!strcmp(cmd, "POST")) + c->post = 1; + else + return -1; + + get_word(url, sizeof(url), (const char **)&p); + pstrcpy(c->url, sizeof(c->url), url); + + get_word(protocol, sizeof(protocol), (const char **)&p); + if (strcmp(protocol, "HTTP/1.0") && strcmp(protocol, "HTTP/1.1")) + return -1; + + pstrcpy(c->protocol, sizeof(c->protocol), protocol); + + if (ffserver_debug) + http_log("New connection: %s %s\n", cmd, url); + + /* find the filename and the optional info string in the request */ + p = url; + if (*p == '/') + p++; + filename = p; + p = strchr(p, '?'); + if (p) { + pstrcpy(info, sizeof(info), p); + *p = '\0'; + } else { + info[0] = '\0'; + } + + for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) { + if (strncasecmp(p, "User-Agent:", 11) == 0) { + useragent = p + 11; + if (*useragent && *useragent != '\n' && isspace(*useragent)) + useragent++; + break; + } + p = strchr(p, '\n'); + if (!p) + break; + + p++; + } + + redir_type = REDIR_NONE; + if (match_ext(filename, "asx")) { + redir_type = REDIR_ASX; + filename[strlen(filename)-1] = 'f'; + } else if (match_ext(filename, "asf") && + (!useragent || strncasecmp(useragent, "NSPlayer", 8) != 0)) { + /* if this isn't WMP or lookalike, return the redirector file */ + redir_type = REDIR_ASF; + } else if (match_ext(filename, "rpm,ram")) { + redir_type = REDIR_RAM; + strcpy(filename + strlen(filename)-2, "m"); + } else if (match_ext(filename, "rtsp")) { + redir_type = REDIR_RTSP; + compute_real_filename(filename, sizeof(url) - 1); + } else if (match_ext(filename, "sdp")) { + redir_type = REDIR_SDP; + compute_real_filename(filename, sizeof(url) - 1); + } + + stream = first_stream; + while (stream != NULL) { + if (!strcmp(stream->filename, filename) && validate_acl(stream, c)) + break; + stream = stream->next; + } + if (stream == NULL) { + snprintf(msg, sizeof(msg), "File '%s' not found", url); + goto send_error; + } + + c->stream = stream; + memcpy(c->feed_streams, stream->feed_streams, sizeof(c->feed_streams)); + memset(c->switch_feed_streams, -1, sizeof(c->switch_feed_streams)); + + if (stream->stream_type == STREAM_TYPE_REDIRECT) { + c->http_error = 301; + q = c->buffer; + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 301 Moved\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Location: %s\r\n", stream->feed_filename); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: text/html\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Moved\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "You should be redirected.\r\n", stream->feed_filename); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + + /* prepare output buffer */ + c->buffer_ptr = c->buffer; + c->buffer_end = q; + c->state = HTTPSTATE_SEND_HEADER; + return 0; + } + + /* If this is WMP, get the rate information */ + if (extract_rates(ratebuf, sizeof(ratebuf), c->buffer)) { + if (modify_current_stream(c, ratebuf)) { + for (i = 0; i < sizeof(c->feed_streams) / sizeof(c->feed_streams[0]); i++) { + if (c->switch_feed_streams[i] >= 0) + do_switch_stream(c, i); + } + } + } + + if (c->post == 0 && stream->stream_type == STREAM_TYPE_LIVE) { + current_bandwidth += stream->bandwidth; + } + + if (c->post == 0 && max_bandwidth < current_bandwidth) { + c->http_error = 200; + q = c->buffer; + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 Server too busy\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: text/html\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Too busy\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "

The server is too busy to serve your request at this time.

\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "

The bandwidth being served (including your stream) is %dkbit/sec, and this exceeds the limit of %dkbit/sec.

\r\n", + current_bandwidth, max_bandwidth); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + + /* prepare output buffer */ + c->buffer_ptr = c->buffer; + c->buffer_end = q; + c->state = HTTPSTATE_SEND_HEADER; + return 0; + } + + if (redir_type != REDIR_NONE) { + char *hostinfo = 0; + + for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) { + if (strncasecmp(p, "Host:", 5) == 0) { + hostinfo = p + 5; + break; + } + p = strchr(p, '\n'); + if (!p) + break; + + p++; + } + + if (hostinfo) { + char *eoh; + char hostbuf[260]; + + while (isspace(*hostinfo)) + hostinfo++; + + eoh = strchr(hostinfo, '\n'); + if (eoh) { + if (eoh[-1] == '\r') + eoh--; + + if (eoh - hostinfo < sizeof(hostbuf) - 1) { + memcpy(hostbuf, hostinfo, eoh - hostinfo); + hostbuf[eoh - hostinfo] = 0; + + c->http_error = 200; + q = c->buffer; + switch(redir_type) { + case REDIR_ASX: + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 ASX Follows\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: video/x-ms-asf\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + //q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n", + hostbuf, filename, info); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + break; + case REDIR_RAM: + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 RAM Follows\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: audio/x-pn-realaudio\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "# Autogenerated by ffserver\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "http://%s/%s%s\r\n", + hostbuf, filename, info); + break; + case REDIR_ASF: + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 ASF Redirect follows\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: video/x-ms-asf\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "[Reference]\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Ref1=http://%s/%s%s\r\n", + hostbuf, filename, info); + break; + case REDIR_RTSP: + { + char hostname[256], *p; + /* extract only hostname */ + pstrcpy(hostname, sizeof(hostname), hostbuf); + p = strrchr(hostname, ':'); + if (p) + *p = '\0'; + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 RTSP Redirect follows\r\n"); + /* XXX: incorrect mime type ? */ + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: application/x-rtsp\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "rtsp://%s:%d/%s\r\n", + hostname, ntohs(my_rtsp_addr.sin_port), + filename); + } + break; + case REDIR_SDP: + { + uint8_t *sdp_data; + int sdp_data_size, len; + struct sockaddr_in my_addr; + + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 OK\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: application/sdp\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + + len = sizeof(my_addr); + getsockname(c->fd, (struct sockaddr *)&my_addr, &len); + + /* XXX: should use a dynamic buffer */ + sdp_data_size = prepare_sdp_description(stream, + &sdp_data, + my_addr.sin_addr); + if (sdp_data_size > 0) { + memcpy(q, sdp_data, sdp_data_size); + q += sdp_data_size; + *q = '\0'; + av_free(sdp_data); + } + } + break; + default: + av_abort(); + break; + } + + /* prepare output buffer */ + c->buffer_ptr = c->buffer; + c->buffer_end = q; + c->state = HTTPSTATE_SEND_HEADER; + return 0; + } + } + } + + snprintf(msg, sizeof(msg), "ASX/RAM file not handled"); + goto send_error; + } + + stream->conns_served++; + + /* XXX: add there authenticate and IP match */ + + if (c->post) { + /* if post, it means a feed is being sent */ + if (!stream->is_feed) { + /* However it might be a status report from WMP! Lets log the data + * as it might come in handy one day + */ + char *logline = 0; + int client_id = 0; + + for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) { + if (strncasecmp(p, "Pragma: log-line=", 17) == 0) { + logline = p; + break; + } + if (strncasecmp(p, "Pragma: client-id=", 18) == 0) { + client_id = strtol(p + 18, 0, 10); + } + p = strchr(p, '\n'); + if (!p) + break; + + p++; + } + + if (logline) { + char *eol = strchr(logline, '\n'); + + logline += 17; + + if (eol) { + if (eol[-1] == '\r') + eol--; + http_log("%.*s\n", (int) (eol - logline), logline); + c->suppress_log = 1; + } + } + +#ifdef DEBUG_WMP + http_log("\nGot request:\n%s\n", c->buffer); +#endif + + if (client_id && extract_rates(ratebuf, sizeof(ratebuf), c->buffer)) { + HTTPContext *wmpc; + + /* Now we have to find the client_id */ + for (wmpc = first_http_ctx; wmpc; wmpc = wmpc->next) { + if (wmpc->wmp_client_id == client_id) + break; + } + + if (wmpc) { + if (modify_current_stream(wmpc, ratebuf)) { + wmpc->switch_pending = 1; + } + } + } + + snprintf(msg, sizeof(msg), "POST command not handled"); + c->stream = 0; + goto send_error; + } + if (http_start_receive_data(c) < 0) { + snprintf(msg, sizeof(msg), "could not open feed"); + goto send_error; + } + c->http_error = 0; + c->state = HTTPSTATE_RECEIVE_DATA; + return 0; + } + +#ifdef DEBUG_WMP + if (strcmp(stream->filename + strlen(stream->filename) - 4, ".asf") == 0) { + http_log("\nGot request:\n%s\n", c->buffer); + } +#endif + + if (c->stream->stream_type == STREAM_TYPE_STATUS) + goto send_stats; + + /* open input stream */ + if (open_input_stream(c, info) < 0) { + snprintf(msg, sizeof(msg), "Input stream corresponding to '%s' not found", url); + goto send_error; + } + + /* prepare http header */ + q = c->buffer; + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 200 OK\r\n"); + mime_type = c->stream->fmt->mime_type; + if (!mime_type) + mime_type = "application/x-octet_stream"; + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Pragma: no-cache\r\n"); + + /* for asf, we need extra headers */ + if (!strcmp(c->stream->fmt->name,"asf_stream")) { + /* Need to allocate a client id */ + + c->wmp_client_id = random() & 0x7fffffff; + + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Server: Cougar 4.1.0.3923\r\nCache-Control: no-cache\r\nPragma: client-id=%d\r\nPragma: features=\"broadcast\"\r\n", c->wmp_client_id); + } + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-Type: %s\r\n", mime_type); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + + /* prepare output buffer */ + c->http_error = 0; + c->buffer_ptr = c->buffer; + c->buffer_end = q; + c->state = HTTPSTATE_SEND_HEADER; + return 0; + send_error: + c->http_error = 404; + q = c->buffer; + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "HTTP/1.0 404 Not Found\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "Content-type: %s\r\n", "text/html"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\r\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "404 Not Found\n"); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "%s\n", msg); + q += snprintf(q, q - (char *) c->buffer + c->buffer_size, "\n"); + + /* prepare output buffer */ + c->buffer_ptr = c->buffer; + c->buffer_end = q; + c->state = HTTPSTATE_SEND_HEADER; + return 0; + send_stats: + compute_stats(c); + c->http_error = 200; /* horrible : we use this value to avoid + going to the send data state */ + c->state = HTTPSTATE_SEND_HEADER; + return 0; +} + +static void fmt_bytecount(ByteIOContext *pb, int64_t count) +{ + static const char *suffix = " kMGTP"; + const char *s; + + for (s = suffix; count >= 100000 && s[1]; count /= 1000, s++) { + } + + url_fprintf(pb, "%"PRId64"%c", count, *s); +} + +static void compute_stats(HTTPContext *c) +{ + HTTPContext *c1; + FFStream *stream; + char *p; + time_t ti; + int i, len; + ByteIOContext pb1, *pb = &pb1; + + if (url_open_dyn_buf(pb) < 0) { + /* XXX: return an error ? */ + c->buffer_ptr = c->buffer; + c->buffer_end = c->buffer; + return; + } + + url_fprintf(pb, "HTTP/1.0 200 OK\r\n"); + url_fprintf(pb, "Content-type: %s\r\n", "text/html"); + url_fprintf(pb, "Pragma: no-cache\r\n"); + url_fprintf(pb, "\r\n"); + + url_fprintf(pb, "FFServer Status\n"); + if (c->stream->feed_filename) { + url_fprintf(pb, "\n", c->stream->feed_filename); + } + url_fprintf(pb, "\n"); + url_fprintf(pb, "

FFServer Status

\n"); + /* format status */ + url_fprintf(pb, "

Available Streams

\n"); + url_fprintf(pb, "\n"); + url_fprintf(pb, "
PathServed
Conns

bytes
FormatBit rate
kbits/s
Video
kbits/s

Codec
Audio
kbits/s

Codec
Feed\n"); + stream = first_stream; + while (stream != NULL) { + char sfilename[1024]; + char *eosf; + + if (stream->feed != stream) { + pstrcpy(sfilename, sizeof(sfilename) - 10, stream->filename); + eosf = sfilename + strlen(sfilename); + if (eosf - sfilename >= 4) { + if (strcmp(eosf - 4, ".asf") == 0) { + strcpy(eosf - 4, ".asx"); + } else if (strcmp(eosf - 3, ".rm") == 0) { + strcpy(eosf - 3, ".ram"); + } else if (stream->fmt == &rtp_mux) { + /* generate a sample RTSP director if + unicast. Generate an SDP redirector if + multicast */ + eosf = strrchr(sfilename, '.'); + if (!eosf) + eosf = sfilename + strlen(sfilename); + if (stream->is_multicast) + strcpy(eosf, ".sdp"); + else + strcpy(eosf, ".rtsp"); + } + } + + url_fprintf(pb, "
%s ", + sfilename, stream->filename); + url_fprintf(pb, " %d ", + stream->conns_served); + fmt_bytecount(pb, stream->bytes_served); + switch(stream->stream_type) { + case STREAM_TYPE_LIVE: + { + int audio_bit_rate = 0; + int video_bit_rate = 0; + const char *audio_codec_name = ""; + const char *video_codec_name = ""; + const char *audio_codec_name_extra = ""; + const char *video_codec_name_extra = ""; + + for(i=0;inb_streams;i++) { + AVStream *st = stream->streams[i]; + AVCodec *codec = avcodec_find_encoder(st->codec->codec_id); + switch(st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + audio_bit_rate += st->codec->bit_rate; + if (codec) { + if (*audio_codec_name) + audio_codec_name_extra = "..."; + audio_codec_name = codec->name; + } + break; + case CODEC_TYPE_VIDEO: + video_bit_rate += st->codec->bit_rate; + if (codec) { + if (*video_codec_name) + video_codec_name_extra = "..."; + video_codec_name = codec->name; + } + break; + case CODEC_TYPE_DATA: + video_bit_rate += st->codec->bit_rate; + break; + default: + av_abort(); + } + } + url_fprintf(pb, " %s %d %d %s %s %d %s %s", + stream->fmt->name, + stream->bandwidth, + video_bit_rate / 1000, video_codec_name, video_codec_name_extra, + audio_bit_rate / 1000, audio_codec_name, audio_codec_name_extra); + if (stream->feed) { + url_fprintf(pb, "%s", stream->feed->filename); + } else { + url_fprintf(pb, "%s", stream->feed_filename); + } + url_fprintf(pb, "\n"); + } + break; + default: + url_fprintf(pb, " - - - - \n"); + break; + } + } + stream = stream->next; + } + url_fprintf(pb, "
\n"); + + stream = first_stream; + while (stream != NULL) { + if (stream->feed == stream) { + url_fprintf(pb, "

Feed %s

", stream->filename); + if (stream->pid) { + url_fprintf(pb, "Running as pid %d.\n", stream->pid); + +#if defined(linux) && !defined(CONFIG_NOCUTILS) + { + FILE *pid_stat; + char ps_cmd[64]; + + /* This is somewhat linux specific I guess */ + snprintf(ps_cmd, sizeof(ps_cmd), + "ps -o \"%%cpu,cputime\" --no-headers %d", + stream->pid); + + pid_stat = popen(ps_cmd, "r"); + if (pid_stat) { + char cpuperc[10]; + char cpuused[64]; + + if (fscanf(pid_stat, "%10s %64s", cpuperc, + cpuused) == 2) { + url_fprintf(pb, "Currently using %s%% of the cpu. Total time used %s.\n", + cpuperc, cpuused); + } + fclose(pid_stat); + } + } +#endif + + url_fprintf(pb, "

"); + } + url_fprintf(pb, "
Streamtypekbits/scodecParameters\n"); + + for (i = 0; i < stream->nb_streams; i++) { + AVStream *st = stream->streams[i]; + AVCodec *codec = avcodec_find_encoder(st->codec->codec_id); + const char *type = "unknown"; + char parameters[64]; + + parameters[0] = 0; + + switch(st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + type = "audio"; + break; + case CODEC_TYPE_VIDEO: + type = "video"; + snprintf(parameters, sizeof(parameters), "%dx%d, q=%d-%d, fps=%d", st->codec->width, st->codec->height, + st->codec->qmin, st->codec->qmax, st->codec->time_base.den / st->codec->time_base.num); + break; + default: + av_abort(); + } + url_fprintf(pb, "
%d%s%d%s%s\n", + i, type, st->codec->bit_rate/1000, codec ? codec->name : "", parameters); + } + url_fprintf(pb, "
\n"); + + } + stream = stream->next; + } + +#if 0 + { + float avg; + AVCodecContext *enc; + char buf[1024]; + + /* feed status */ + stream = first_feed; + while (stream != NULL) { + url_fprintf(pb, "

Feed '%s'

\n", stream->filename); + url_fprintf(pb, "\n"); + url_fprintf(pb, "
ParametersFrame countSizeAvg bitrate (kbits/s)\n"); + for(i=0;inb_streams;i++) { + AVStream *st = stream->streams[i]; + FeedData *fdata = st->priv_data; + enc = st->codec; + + avcodec_string(buf, sizeof(buf), enc); + avg = fdata->avg_frame_size * (float)enc->rate * 8.0; + if (enc->codec->type == CODEC_TYPE_AUDIO && enc->frame_size > 0) + avg /= enc->frame_size; + url_fprintf(pb, "
%s %d %Ld %0.1f\n", + buf, enc->frame_number, fdata->data_count, avg / 1000.0); + } + url_fprintf(pb, "
\n"); + stream = stream->next_feed; + } + } +#endif + + /* connection status */ + url_fprintf(pb, "

Connection Status

\n"); + + url_fprintf(pb, "Number of connections: %d / %d
\n", + nb_connections, nb_max_connections); + + url_fprintf(pb, "Bandwidth in use: %dk / %dk
\n", + current_bandwidth, max_bandwidth); + + url_fprintf(pb, "\n"); + url_fprintf(pb, "
#FileIPProtoStateTarget bits/secActual bits/secBytes transferred\n"); + c1 = first_http_ctx; + i = 0; + while (c1 != NULL) { + int bitrate; + int j; + + bitrate = 0; + if (c1->stream) { + for (j = 0; j < c1->stream->nb_streams; j++) { + if (!c1->stream->feed) { + bitrate += c1->stream->streams[j]->codec->bit_rate; + } else { + if (c1->feed_streams[j] >= 0) { + bitrate += c1->stream->feed->streams[c1->feed_streams[j]]->codec->bit_rate; + } + } + } + } + + i++; + p = inet_ntoa(c1->from_addr.sin_addr); + url_fprintf(pb, "
%d%s%s%s%s%s", + i, + c1->stream ? c1->stream->filename : "", + c1->state == HTTPSTATE_RECEIVE_DATA ? "(input)" : "", + p, + c1->protocol, + http_state[c1->state]); + fmt_bytecount(pb, bitrate); + url_fprintf(pb, ""); + fmt_bytecount(pb, compute_datarate(&c1->datarate, c1->data_count) * 8); + url_fprintf(pb, ""); + fmt_bytecount(pb, c1->data_count); + url_fprintf(pb, "\n"); + c1 = c1->next; + } + url_fprintf(pb, "
\n"); + + /* date */ + ti = time(NULL); + p = ctime(&ti); + url_fprintf(pb, "
Generated at %s", p); + url_fprintf(pb, "\n\n"); + + len = url_close_dyn_buf(pb, &c->pb_buffer); + c->buffer_ptr = c->pb_buffer; + c->buffer_end = c->pb_buffer + len; +} + +/* check if the parser needs to be opened for stream i */ +static void open_parser(AVFormatContext *s, int i) +{ + AVStream *st = s->streams[i]; + AVCodec *codec; + + if (!st->codec->codec) { + codec = avcodec_find_decoder(st->codec->codec_id); + if (codec && (codec->capabilities & CODEC_CAP_PARSE_ONLY)) { + st->codec->parse_only = 1; + if (avcodec_open(st->codec, codec) < 0) { + st->codec->parse_only = 0; + } + } + } +} + +static int open_input_stream(HTTPContext *c, const char *info) +{ + char buf[128]; + char input_filename[1024]; + AVFormatContext *s; + int buf_size, i; + int64_t stream_pos; + + /* find file name */ + if (c->stream->feed) { + strcpy(input_filename, c->stream->feed->feed_filename); + buf_size = FFM_PACKET_SIZE; + /* compute position (absolute time) */ + if (find_info_tag(buf, sizeof(buf), "date", info)) { + stream_pos = parse_date(buf, 0); + } else if (find_info_tag(buf, sizeof(buf), "buffer", info)) { + int prebuffer = strtol(buf, 0, 10); + stream_pos = av_gettime() - prebuffer * (int64_t)1000000; + } else { + stream_pos = av_gettime() - c->stream->prebuffer * (int64_t)1000; + } + } else { + strcpy(input_filename, c->stream->feed_filename); + buf_size = 0; + /* compute position (relative time) */ + if (find_info_tag(buf, sizeof(buf), "date", info)) { + stream_pos = parse_date(buf, 1); + } else { + stream_pos = 0; + } + } + if (input_filename[0] == '\0') + return -1; + +#if 0 + { time_t when = stream_pos / 1000000; + http_log("Stream pos = %lld, time=%s", stream_pos, ctime(&when)); + } +#endif + + /* open stream */ + if (av_open_input_file(&s, input_filename, c->stream->ifmt, + buf_size, c->stream->ap_in) < 0) { + http_log("%s not found", input_filename); + return -1; + } + c->fmt_in = s; + + /* open each parser */ + for(i=0;inb_streams;i++) + open_parser(s, i); + + /* choose stream as clock source (we favorize video stream if + present) for packet sending */ + c->pts_stream_index = 0; + for(i=0;istream->nb_streams;i++) { + if (c->pts_stream_index == 0 && + c->stream->streams[i]->codec->codec_type == CODEC_TYPE_VIDEO) { + c->pts_stream_index = i; + } + } + +#if 1 + if (c->fmt_in->iformat->read_seek) { + c->fmt_in->iformat->read_seek(c->fmt_in, 0, stream_pos, 0); + } +#endif + /* set the start time (needed for maxtime and RTP packet timing) */ + c->start_time = cur_time; + c->first_pts = AV_NOPTS_VALUE; + return 0; +} + +/* return the server clock (in us) */ +static int64_t get_server_clock(HTTPContext *c) +{ + /* compute current pts value from system time */ + return (int64_t)(cur_time - c->start_time) * 1000LL; +} + +/* return the estimated time at which the current packet must be sent + (in us) */ +static int64_t get_packet_send_clock(HTTPContext *c) +{ + int bytes_left, bytes_sent, frame_bytes; + + frame_bytes = c->cur_frame_bytes; + if (frame_bytes <= 0) { + return c->cur_pts; + } else { + bytes_left = c->buffer_end - c->buffer_ptr; + bytes_sent = frame_bytes - bytes_left; + return c->cur_pts + (c->cur_frame_duration * bytes_sent) / frame_bytes; + } +} + + +static int http_prepare_data(HTTPContext *c) +{ + int i, len, ret; + AVFormatContext *ctx; + + av_freep(&c->pb_buffer); + switch(c->state) { + case HTTPSTATE_SEND_DATA_HEADER: + memset(&c->fmt_ctx, 0, sizeof(c->fmt_ctx)); + pstrcpy(c->fmt_ctx.author, sizeof(c->fmt_ctx.author), + c->stream->author); + pstrcpy(c->fmt_ctx.comment, sizeof(c->fmt_ctx.comment), + c->stream->comment); + pstrcpy(c->fmt_ctx.copyright, sizeof(c->fmt_ctx.copyright), + c->stream->copyright); + pstrcpy(c->fmt_ctx.title, sizeof(c->fmt_ctx.title), + c->stream->title); + + /* open output stream by using specified codecs */ + c->fmt_ctx.oformat = c->stream->fmt; + c->fmt_ctx.nb_streams = c->stream->nb_streams; + for(i=0;ifmt_ctx.nb_streams;i++) { + AVStream *st; + AVStream *src; + st = av_mallocz(sizeof(AVStream)); + st->codec= avcodec_alloc_context(); + c->fmt_ctx.streams[i] = st; + /* if file or feed, then just take streams from FFStream struct */ + if (!c->stream->feed || + c->stream->feed == c->stream) + src = c->stream->streams[i]; + else + src = c->stream->feed->streams[c->stream->feed_streams[i]]; + + *st = *src; + st->priv_data = 0; + st->codec->frame_number = 0; /* XXX: should be done in + AVStream, not in codec */ + /* I'm pretty sure that this is not correct... + * However, without it, we crash + */ + st->codec->coded_frame = &dummy_frame; + } + c->got_key_frame = 0; + + /* prepare header and save header data in a stream */ + if (url_open_dyn_buf(&c->fmt_ctx.pb) < 0) { + /* XXX: potential leak */ + return -1; + } + c->fmt_ctx.pb.is_streamed = 1; + + av_set_parameters(&c->fmt_ctx, NULL); + av_write_header(&c->fmt_ctx); + + len = url_close_dyn_buf(&c->fmt_ctx.pb, &c->pb_buffer); + c->buffer_ptr = c->pb_buffer; + c->buffer_end = c->pb_buffer + len; + + c->state = HTTPSTATE_SEND_DATA; + c->last_packet_sent = 0; + break; + case HTTPSTATE_SEND_DATA: + /* find a new packet */ + { + AVPacket pkt; + + /* read a packet from the input stream */ + if (c->stream->feed) { + ffm_set_write_index(c->fmt_in, + c->stream->feed->feed_write_index, + c->stream->feed->feed_size); + } + + if (c->stream->max_time && + c->stream->max_time + c->start_time - cur_time < 0) { + /* We have timed out */ + c->state = HTTPSTATE_SEND_DATA_TRAILER; + } else { + redo: + if (av_read_frame(c->fmt_in, &pkt) < 0) { + if (c->stream->feed && c->stream->feed->feed_opened) { + /* if coming from feed, it means we reached the end of the + ffm file, so must wait for more data */ + c->state = HTTPSTATE_WAIT_FEED; + return 1; /* state changed */ + } else { + if (c->stream->loop) { + av_close_input_file(c->fmt_in); + c->fmt_in = NULL; + if (open_input_stream(c, "") < 0) + goto no_loop; + goto redo; + } else { + no_loop: + /* must send trailer now because eof or error */ + c->state = HTTPSTATE_SEND_DATA_TRAILER; + } + } + } else { + /* update first pts if needed */ + if (c->first_pts == AV_NOPTS_VALUE) { + c->first_pts = av_rescale_q(pkt.dts, c->fmt_in->streams[pkt.stream_index]->time_base, AV_TIME_BASE_Q); + c->start_time = cur_time; + } + /* send it to the appropriate stream */ + if (c->stream->feed) { + /* if coming from a feed, select the right stream */ + if (c->switch_pending) { + c->switch_pending = 0; + for(i=0;istream->nb_streams;i++) { + if (c->switch_feed_streams[i] == pkt.stream_index) { + if (pkt.flags & PKT_FLAG_KEY) { + do_switch_stream(c, i); + } + } + if (c->switch_feed_streams[i] >= 0) { + c->switch_pending = 1; + } + } + } + for(i=0;istream->nb_streams;i++) { + if (c->feed_streams[i] == pkt.stream_index) { + pkt.stream_index = i; + if (pkt.flags & PKT_FLAG_KEY) { + c->got_key_frame |= 1 << i; + } + /* See if we have all the key frames, then + * we start to send. This logic is not quite + * right, but it works for the case of a + * single video stream with one or more + * audio streams (for which every frame is + * typically a key frame). + */ + if (!c->stream->send_on_key || + ((c->got_key_frame + 1) >> c->stream->nb_streams)) { + goto send_it; + } + } + } + } else { + AVCodecContext *codec; + + send_it: + /* specific handling for RTP: we use several + output stream (one for each RTP + connection). XXX: need more abstract handling */ + if (c->is_packetized) { + AVStream *st; + /* compute send time and duration */ + st = c->fmt_in->streams[pkt.stream_index]; + c->cur_pts = av_rescale_q(pkt.dts, st->time_base, AV_TIME_BASE_Q); + if (st->start_time != AV_NOPTS_VALUE) + c->cur_pts -= av_rescale_q(st->start_time, st->time_base, AV_TIME_BASE_Q); + c->cur_frame_duration = av_rescale_q(pkt.duration, st->time_base, AV_TIME_BASE_Q); +#if 0 + printf("index=%d pts=%0.3f duration=%0.6f\n", + pkt.stream_index, + (double)c->cur_pts / + AV_TIME_BASE, + (double)c->cur_frame_duration / + AV_TIME_BASE); +#endif + /* find RTP context */ + c->packet_stream_index = pkt.stream_index; + ctx = c->rtp_ctx[c->packet_stream_index]; + if(!ctx) { + av_free_packet(&pkt); + break; + } + codec = ctx->streams[0]->codec; + /* only one stream per RTP connection */ + pkt.stream_index = 0; + } else { + ctx = &c->fmt_ctx; + /* Fudge here */ + codec = ctx->streams[pkt.stream_index]->codec; + } + + codec->coded_frame->key_frame = ((pkt.flags & PKT_FLAG_KEY) != 0); + if (c->is_packetized) { + int max_packet_size; + if (c->rtp_protocol == RTSP_PROTOCOL_RTP_TCP) + max_packet_size = RTSP_TCP_MAX_PACKET_SIZE; + else + max_packet_size = url_get_max_packet_size(c->rtp_handles[c->packet_stream_index]); + ret = url_open_dyn_packet_buf(&ctx->pb, max_packet_size); + } else { + ret = url_open_dyn_buf(&ctx->pb); + } + if (ret < 0) { + /* XXX: potential leak */ + return -1; + } + if (av_write_frame(ctx, &pkt)) { + c->state = HTTPSTATE_SEND_DATA_TRAILER; + } + + len = url_close_dyn_buf(&ctx->pb, &c->pb_buffer); + c->cur_frame_bytes = len; + c->buffer_ptr = c->pb_buffer; + c->buffer_end = c->pb_buffer + len; + + codec->frame_number++; + if (len == 0) + goto redo; + } + av_free_packet(&pkt); + } + } + } + break; + default: + case HTTPSTATE_SEND_DATA_TRAILER: + /* last packet test ? */ + if (c->last_packet_sent || c->is_packetized) + return -1; + ctx = &c->fmt_ctx; + /* prepare header */ + if (url_open_dyn_buf(&ctx->pb) < 0) { + /* XXX: potential leak */ + return -1; + } + av_write_trailer(ctx); + len = url_close_dyn_buf(&ctx->pb, &c->pb_buffer); + c->buffer_ptr = c->pb_buffer; + c->buffer_end = c->pb_buffer + len; + + c->last_packet_sent = 1; + break; + } + return 0; +} + +/* in bit/s */ +#define SHORT_TERM_BANDWIDTH 8000000 + +/* should convert the format at the same time */ +/* send data starting at c->buffer_ptr to the output connection + (either UDP or TCP connection) */ +static int http_send_data(HTTPContext *c) +{ + int len, ret; + + for(;;) { + if (c->buffer_ptr >= c->buffer_end) { + ret = http_prepare_data(c); + if (ret < 0) + return -1; + else if (ret != 0) { + /* state change requested */ + break; + } + } else { + if (c->is_packetized) { + /* RTP data output */ + len = c->buffer_end - c->buffer_ptr; + if (len < 4) { + /* fail safe - should never happen */ + fail1: + c->buffer_ptr = c->buffer_end; + return 0; + } + len = (c->buffer_ptr[0] << 24) | + (c->buffer_ptr[1] << 16) | + (c->buffer_ptr[2] << 8) | + (c->buffer_ptr[3]); + if (len > (c->buffer_end - c->buffer_ptr)) + goto fail1; + if ((get_packet_send_clock(c) - get_server_clock(c)) > 0) { + /* nothing to send yet: we can wait */ + return 0; + } + + c->data_count += len; + update_datarate(&c->datarate, c->data_count); + if (c->stream) + c->stream->bytes_served += len; + + if (c->rtp_protocol == RTSP_PROTOCOL_RTP_TCP) { + /* RTP packets are sent inside the RTSP TCP connection */ + ByteIOContext pb1, *pb = &pb1; + int interleaved_index, size; + uint8_t header[4]; + HTTPContext *rtsp_c; + + rtsp_c = c->rtsp_c; + /* if no RTSP connection left, error */ + if (!rtsp_c) + return -1; + /* if already sending something, then wait. */ + if (rtsp_c->state != RTSPSTATE_WAIT_REQUEST) { + break; + } + if (url_open_dyn_buf(pb) < 0) + goto fail1; + interleaved_index = c->packet_stream_index * 2; + /* RTCP packets are sent at odd indexes */ + if (c->buffer_ptr[1] == 200) + interleaved_index++; + /* write RTSP TCP header */ + header[0] = '$'; + header[1] = interleaved_index; + header[2] = len >> 8; + header[3] = len; + put_buffer(pb, header, 4); + /* write RTP packet data */ + c->buffer_ptr += 4; + put_buffer(pb, c->buffer_ptr, len); + size = url_close_dyn_buf(pb, &c->packet_buffer); + /* prepare asynchronous TCP sending */ + rtsp_c->packet_buffer_ptr = c->packet_buffer; + rtsp_c->packet_buffer_end = c->packet_buffer + size; + c->buffer_ptr += len; + + /* send everything we can NOW */ + len = write(rtsp_c->fd, rtsp_c->packet_buffer_ptr, + rtsp_c->packet_buffer_end - rtsp_c->packet_buffer_ptr); + if (len > 0) { + rtsp_c->packet_buffer_ptr += len; + } + if (rtsp_c->packet_buffer_ptr < rtsp_c->packet_buffer_end) { + /* if we could not send all the data, we will + send it later, so a new state is needed to + "lock" the RTSP TCP connection */ + rtsp_c->state = RTSPSTATE_SEND_PACKET; + break; + } else { + /* all data has been sent */ + av_freep(&c->packet_buffer); + } + } else { + /* send RTP packet directly in UDP */ + c->buffer_ptr += 4; + url_write(c->rtp_handles[c->packet_stream_index], + c->buffer_ptr, len); + c->buffer_ptr += len; + /* here we continue as we can send several packets per 10 ms slot */ + } + } else { + /* TCP data output */ + len = write(c->fd, c->buffer_ptr, c->buffer_end - c->buffer_ptr); + if (len < 0) { + if (errno != EAGAIN && errno != EINTR) { + /* error : close connection */ + return -1; + } else { + return 0; + } + } else { + c->buffer_ptr += len; + } + c->data_count += len; + update_datarate(&c->datarate, c->data_count); + if (c->stream) + c->stream->bytes_served += len; + break; + } + } + } /* for(;;) */ + return 0; +} + +static int http_start_receive_data(HTTPContext *c) +{ + int fd; + + if (c->stream->feed_opened) + return -1; + + /* Don't permit writing to this one */ + if (c->stream->readonly) + return -1; + + /* open feed */ + fd = open(c->stream->feed_filename, O_RDWR); + if (fd < 0) + return -1; + c->feed_fd = fd; + + c->stream->feed_write_index = ffm_read_write_index(fd); + c->stream->feed_size = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + + /* init buffer input */ + c->buffer_ptr = c->buffer; + c->buffer_end = c->buffer + FFM_PACKET_SIZE; + c->stream->feed_opened = 1; + return 0; +} + +static int http_receive_data(HTTPContext *c) +{ + HTTPContext *c1; + + if (c->buffer_end > c->buffer_ptr) { + int len; + + len = read(c->fd, c->buffer_ptr, c->buffer_end - c->buffer_ptr); + if (len < 0) { + if (errno != EAGAIN && errno != EINTR) { + /* error : close connection */ + goto fail; + } + } else if (len == 0) { + /* end of connection : close it */ + goto fail; + } else { + c->buffer_ptr += len; + c->data_count += len; + update_datarate(&c->datarate, c->data_count); + } + } + + if (c->buffer_ptr - c->buffer >= 2 && c->data_count > FFM_PACKET_SIZE) { + if (c->buffer[0] != 'f' || + c->buffer[1] != 'm') { + http_log("Feed stream has become desynchronized -- disconnecting\n"); + goto fail; + } + } + + if (c->buffer_ptr >= c->buffer_end) { + FFStream *feed = c->stream; + /* a packet has been received : write it in the store, except + if header */ + if (c->data_count > FFM_PACKET_SIZE) { + + // printf("writing pos=0x%Lx size=0x%Lx\n", feed->feed_write_index, feed->feed_size); + /* XXX: use llseek or url_seek */ + lseek(c->feed_fd, feed->feed_write_index, SEEK_SET); + write(c->feed_fd, c->buffer, FFM_PACKET_SIZE); + + feed->feed_write_index += FFM_PACKET_SIZE; + /* update file size */ + if (feed->feed_write_index > c->stream->feed_size) + feed->feed_size = feed->feed_write_index; + + /* handle wrap around if max file size reached */ + if (c->stream->feed_max_size && feed->feed_write_index >= c->stream->feed_max_size) + feed->feed_write_index = FFM_PACKET_SIZE; + + /* write index */ + ffm_write_write_index(c->feed_fd, feed->feed_write_index); + + /* wake up any waiting connections */ + for(c1 = first_http_ctx; c1 != NULL; c1 = c1->next) { + if (c1->state == HTTPSTATE_WAIT_FEED && + c1->stream->feed == c->stream->feed) { + c1->state = HTTPSTATE_SEND_DATA; + } + } + } else { + /* We have a header in our hands that contains useful data */ + AVFormatContext s; + AVInputFormat *fmt_in; + ByteIOContext *pb = &s.pb; + int i; + + memset(&s, 0, sizeof(s)); + + url_open_buf(pb, c->buffer, c->buffer_end - c->buffer, URL_RDONLY); + pb->buf_end = c->buffer_end; /* ?? */ + pb->is_streamed = 1; + + /* use feed output format name to find corresponding input format */ + fmt_in = av_find_input_format(feed->fmt->name); + if (!fmt_in) + goto fail; + + if (fmt_in->priv_data_size > 0) { + s.priv_data = av_mallocz(fmt_in->priv_data_size); + if (!s.priv_data) + goto fail; + } else + s.priv_data = NULL; + + if (fmt_in->read_header(&s, 0) < 0) { + av_freep(&s.priv_data); + goto fail; + } + + /* Now we have the actual streams */ + if (s.nb_streams != feed->nb_streams) { + av_freep(&s.priv_data); + goto fail; + } + for (i = 0; i < s.nb_streams; i++) { + memcpy(feed->streams[i]->codec, + s.streams[i]->codec, sizeof(AVCodecContext)); + } + av_freep(&s.priv_data); + } + c->buffer_ptr = c->buffer; + } + + return 0; + fail: + c->stream->feed_opened = 0; + close(c->feed_fd); + return -1; +} + +/********************************************************************/ +/* RTSP handling */ + +static void rtsp_reply_header(HTTPContext *c, enum RTSPStatusCode error_number) +{ + const char *str; + time_t ti; + char *p; + char buf2[32]; + + switch(error_number) { +#define DEF(n, c, s) case c: str = s; break; +#include "rtspcodes.h" +#undef DEF + default: + str = "Unknown Error"; + break; + } + + url_fprintf(c->pb, "RTSP/1.0 %d %s\r\n", error_number, str); + url_fprintf(c->pb, "CSeq: %d\r\n", c->seq); + + /* output GMT time */ + ti = time(NULL); + p = ctime(&ti); + strcpy(buf2, p); + p = buf2 + strlen(p) - 1; + if (*p == '\n') + *p = '\0'; + url_fprintf(c->pb, "Date: %s GMT\r\n", buf2); +} + +static void rtsp_reply_error(HTTPContext *c, enum RTSPStatusCode error_number) +{ + rtsp_reply_header(c, error_number); + url_fprintf(c->pb, "\r\n"); +} + +static int rtsp_parse_request(HTTPContext *c) +{ + const char *p, *p1, *p2; + char cmd[32]; + char url[1024]; + char protocol[32]; + char line[1024]; + ByteIOContext pb1; + int len; + RTSPHeader header1, *header = &header1; + + c->buffer_ptr[0] = '\0'; + p = c->buffer; + + get_word(cmd, sizeof(cmd), &p); + get_word(url, sizeof(url), &p); + get_word(protocol, sizeof(protocol), &p); + + pstrcpy(c->method, sizeof(c->method), cmd); + pstrcpy(c->url, sizeof(c->url), url); + pstrcpy(c->protocol, sizeof(c->protocol), protocol); + + c->pb = &pb1; + if (url_open_dyn_buf(c->pb) < 0) { + /* XXX: cannot do more */ + c->pb = NULL; /* safety */ + return -1; + } + + /* check version name */ + if (strcmp(protocol, "RTSP/1.0") != 0) { + rtsp_reply_error(c, RTSP_STATUS_VERSION); + goto the_end; + } + + /* parse each header line */ + memset(header, 0, sizeof(RTSPHeader)); + /* skip to next line */ + while (*p != '\n' && *p != '\0') + p++; + if (*p == '\n') + p++; + while (*p != '\0') { + p1 = strchr(p, '\n'); + if (!p1) + break; + p2 = p1; + if (p2 > p && p2[-1] == '\r') + p2--; + /* skip empty line */ + if (p2 == p) + break; + len = p2 - p; + if (len > sizeof(line) - 1) + len = sizeof(line) - 1; + memcpy(line, p, len); + line[len] = '\0'; + rtsp_parse_line(header, line); + p = p1 + 1; + } + + /* handle sequence number */ + c->seq = header->seq; + + if (!strcmp(cmd, "DESCRIBE")) { + rtsp_cmd_describe(c, url); + } else if (!strcmp(cmd, "OPTIONS")) { + rtsp_cmd_options(c, url); + } else if (!strcmp(cmd, "SETUP")) { + rtsp_cmd_setup(c, url, header); + } else if (!strcmp(cmd, "PLAY")) { + rtsp_cmd_play(c, url, header); + } else if (!strcmp(cmd, "PAUSE")) { + rtsp_cmd_pause(c, url, header); + } else if (!strcmp(cmd, "TEARDOWN")) { + rtsp_cmd_teardown(c, url, header); + } else { + rtsp_reply_error(c, RTSP_STATUS_METHOD); + } + the_end: + len = url_close_dyn_buf(c->pb, &c->pb_buffer); + c->pb = NULL; /* safety */ + if (len < 0) { + /* XXX: cannot do more */ + return -1; + } + c->buffer_ptr = c->pb_buffer; + c->buffer_end = c->pb_buffer + len; + c->state = RTSPSTATE_SEND_REPLY; + return 0; +} + +/* XXX: move that to rtsp.c, but would need to replace FFStream by + AVFormatContext */ +static int prepare_sdp_description(FFStream *stream, uint8_t **pbuffer, + struct in_addr my_ip) +{ + ByteIOContext pb1, *pb = &pb1; + int i, payload_type, port, private_payload_type, j; + const char *ipstr, *title, *mediatype; + AVStream *st; + + if (url_open_dyn_buf(pb) < 0) + return -1; + + /* general media info */ + + url_fprintf(pb, "v=0\n"); + ipstr = inet_ntoa(my_ip); + url_fprintf(pb, "o=- 0 0 IN IP4 %s\n", ipstr); + title = stream->title; + if (title[0] == '\0') + title = "No Title"; + url_fprintf(pb, "s=%s\n", title); + if (stream->comment[0] != '\0') + url_fprintf(pb, "i=%s\n", stream->comment); + if (stream->is_multicast) { + url_fprintf(pb, "c=IN IP4 %s\n", inet_ntoa(stream->multicast_ip)); + } + /* for each stream, we output the necessary info */ + private_payload_type = RTP_PT_PRIVATE; + for(i = 0; i < stream->nb_streams; i++) { + st = stream->streams[i]; + if (st->codec->codec_id == CODEC_ID_MPEG2TS) { + mediatype = "video"; + } else { + switch(st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + mediatype = "audio"; + break; + case CODEC_TYPE_VIDEO: + mediatype = "video"; + break; + default: + mediatype = "application"; + break; + } + } + /* NOTE: the port indication is not correct in case of + unicast. It is not an issue because RTSP gives it */ + payload_type = rtp_get_payload_type(st->codec); + if (payload_type < 0) + payload_type = private_payload_type++; + if (stream->is_multicast) { + port = stream->multicast_port + 2 * i; + } else { + port = 0; + } + url_fprintf(pb, "m=%s %d RTP/AVP %d\n", + mediatype, port, payload_type); + if (payload_type >= RTP_PT_PRIVATE) { + /* for private payload type, we need to give more info */ + switch(st->codec->codec_id) { + case CODEC_ID_MPEG4: + { + uint8_t *data; + url_fprintf(pb, "a=rtpmap:%d MP4V-ES/%d\n", + payload_type, 90000); + /* we must also add the mpeg4 header */ + data = st->codec->extradata; + if (data) { + url_fprintf(pb, "a=fmtp:%d config=", payload_type); + for(j=0;jcodec->extradata_size;j++) { + url_fprintf(pb, "%02x", data[j]); + } + url_fprintf(pb, "\n"); + } + } + break; + default: + /* XXX: add other codecs ? */ + goto fail; + } + } + url_fprintf(pb, "a=control:streamid=%d\n", i); + } + return url_close_dyn_buf(pb, pbuffer); + fail: + url_close_dyn_buf(pb, pbuffer); + av_free(*pbuffer); + return -1; +} + +static void rtsp_cmd_options(HTTPContext *c, const char *url) +{ +// rtsp_reply_header(c, RTSP_STATUS_OK); + url_fprintf(c->pb, "RTSP/1.0 %d %s\r\n", RTSP_STATUS_OK, "OK"); + url_fprintf(c->pb, "CSeq: %d\r\n", c->seq); + url_fprintf(c->pb, "Public: %s\r\n", "OPTIONS, DESCRIBE, SETUP, TEARDOWN, PLAY, PAUSE"); + url_fprintf(c->pb, "\r\n"); +} + +static void rtsp_cmd_describe(HTTPContext *c, const char *url) +{ + FFStream *stream; + char path1[1024]; + const char *path; + uint8_t *content; + int content_length, len; + struct sockaddr_in my_addr; + + /* find which url is asked */ + url_split(NULL, 0, NULL, 0, NULL, 0, NULL, path1, sizeof(path1), url); + path = path1; + if (*path == '/') + path++; + + for(stream = first_stream; stream != NULL; stream = stream->next) { + if (!stream->is_feed && stream->fmt == &rtp_mux && + !strcmp(path, stream->filename)) { + goto found; + } + } + /* no stream found */ + rtsp_reply_error(c, RTSP_STATUS_SERVICE); /* XXX: right error ? */ + return; + + found: + /* prepare the media description in sdp format */ + + /* get the host IP */ + len = sizeof(my_addr); + getsockname(c->fd, (struct sockaddr *)&my_addr, &len); + content_length = prepare_sdp_description(stream, &content, my_addr.sin_addr); + if (content_length < 0) { + rtsp_reply_error(c, RTSP_STATUS_INTERNAL); + return; + } + rtsp_reply_header(c, RTSP_STATUS_OK); + url_fprintf(c->pb, "Content-Type: application/sdp\r\n"); + url_fprintf(c->pb, "Content-Length: %d\r\n", content_length); + url_fprintf(c->pb, "\r\n"); + put_buffer(c->pb, content, content_length); +} + +static HTTPContext *find_rtp_session(const char *session_id) +{ + HTTPContext *c; + + if (session_id[0] == '\0') + return NULL; + + for(c = first_http_ctx; c != NULL; c = c->next) { + if (!strcmp(c->session_id, session_id)) + return c; + } + return NULL; +} + +static RTSPTransportField *find_transport(RTSPHeader *h, enum RTSPProtocol protocol) +{ + RTSPTransportField *th; + int i; + + for(i=0;inb_transports;i++) { + th = &h->transports[i]; + if (th->protocol == protocol) + return th; + } + return NULL; +} + +static void rtsp_cmd_setup(HTTPContext *c, const char *url, + RTSPHeader *h) +{ + FFStream *stream; + int stream_index, port; + char buf[1024]; + char path1[1024]; + const char *path; + HTTPContext *rtp_c; + RTSPTransportField *th; + struct sockaddr_in dest_addr; + RTSPActionServerSetup setup; + + /* find which url is asked */ + url_split(NULL, 0, NULL, 0, NULL, 0, NULL, path1, sizeof(path1), url); + path = path1; + if (*path == '/') + path++; + + /* now check each stream */ + for(stream = first_stream; stream != NULL; stream = stream->next) { + if (!stream->is_feed && stream->fmt == &rtp_mux) { + /* accept aggregate filenames only if single stream */ + if (!strcmp(path, stream->filename)) { + if (stream->nb_streams != 1) { + rtsp_reply_error(c, RTSP_STATUS_AGGREGATE); + return; + } + stream_index = 0; + goto found; + } + + for(stream_index = 0; stream_index < stream->nb_streams; + stream_index++) { + snprintf(buf, sizeof(buf), "%s/streamid=%d", + stream->filename, stream_index); + if (!strcmp(path, buf)) + goto found; + } + } + } + /* no stream found */ + rtsp_reply_error(c, RTSP_STATUS_SERVICE); /* XXX: right error ? */ + return; + found: + + /* generate session id if needed */ + if (h->session_id[0] == '\0') { + snprintf(h->session_id, sizeof(h->session_id), + "%08x%08x", (int)random(), (int)random()); + } + + /* find rtp session, and create it if none found */ + rtp_c = find_rtp_session(h->session_id); + if (!rtp_c) { + /* always prefer UDP */ + th = find_transport(h, RTSP_PROTOCOL_RTP_UDP); + if (!th) { + th = find_transport(h, RTSP_PROTOCOL_RTP_TCP); + if (!th) { + rtsp_reply_error(c, RTSP_STATUS_TRANSPORT); + return; + } + } + + rtp_c = rtp_new_connection(&c->from_addr, stream, h->session_id, + th->protocol); + if (!rtp_c) { + rtsp_reply_error(c, RTSP_STATUS_BANDWIDTH); + return; + } + + /* open input stream */ + if (open_input_stream(rtp_c, "") < 0) { + rtsp_reply_error(c, RTSP_STATUS_INTERNAL); + return; + } + } + + /* test if stream is OK (test needed because several SETUP needs + to be done for a given file) */ + if (rtp_c->stream != stream) { + rtsp_reply_error(c, RTSP_STATUS_SERVICE); + return; + } + + /* test if stream is already set up */ + if (rtp_c->rtp_ctx[stream_index]) { + rtsp_reply_error(c, RTSP_STATUS_STATE); + return; + } + + /* check transport */ + th = find_transport(h, rtp_c->rtp_protocol); + if (!th || (th->protocol == RTSP_PROTOCOL_RTP_UDP && + th->client_port_min <= 0)) { + rtsp_reply_error(c, RTSP_STATUS_TRANSPORT); + return; + } + + /* setup default options */ + setup.transport_option[0] = '\0'; + dest_addr = rtp_c->from_addr; + dest_addr.sin_port = htons(th->client_port_min); + + /* add transport option if needed */ + if (ff_rtsp_callback) { + setup.ipaddr = ntohl(dest_addr.sin_addr.s_addr); + if (ff_rtsp_callback(RTSP_ACTION_SERVER_SETUP, rtp_c->session_id, + (char *)&setup, sizeof(setup), + stream->rtsp_option) < 0) { + rtsp_reply_error(c, RTSP_STATUS_TRANSPORT); + return; + } + dest_addr.sin_addr.s_addr = htonl(setup.ipaddr); + } + + /* setup stream */ + if (rtp_new_av_stream(rtp_c, stream_index, &dest_addr, c) < 0) { + rtsp_reply_error(c, RTSP_STATUS_TRANSPORT); + return; + } + + /* now everything is OK, so we can send the connection parameters */ + rtsp_reply_header(c, RTSP_STATUS_OK); + /* session ID */ + url_fprintf(c->pb, "Session: %s\r\n", rtp_c->session_id); + + switch(rtp_c->rtp_protocol) { + case RTSP_PROTOCOL_RTP_UDP: + port = rtp_get_local_port(rtp_c->rtp_handles[stream_index]); + url_fprintf(c->pb, "Transport: RTP/AVP/UDP;unicast;" + "client_port=%d-%d;server_port=%d-%d", + th->client_port_min, th->client_port_min + 1, + port, port + 1); + break; + case RTSP_PROTOCOL_RTP_TCP: + url_fprintf(c->pb, "Transport: RTP/AVP/TCP;interleaved=%d-%d", + stream_index * 2, stream_index * 2 + 1); + break; + default: + break; + } + if (setup.transport_option[0] != '\0') { + url_fprintf(c->pb, ";%s", setup.transport_option); + } + url_fprintf(c->pb, "\r\n"); + + + url_fprintf(c->pb, "\r\n"); +} + + +/* find an rtp connection by using the session ID. Check consistency + with filename */ +static HTTPContext *find_rtp_session_with_url(const char *url, + const char *session_id) +{ + HTTPContext *rtp_c; + char path1[1024]; + const char *path; + char buf[1024]; + int s; + + rtp_c = find_rtp_session(session_id); + if (!rtp_c) + return NULL; + + /* find which url is asked */ + url_split(NULL, 0, NULL, 0, NULL, 0, NULL, path1, sizeof(path1), url); + path = path1; + if (*path == '/') + path++; + if(!strcmp(path, rtp_c->stream->filename)) return rtp_c; + for(s=0; sstream->nb_streams; ++s) { + snprintf(buf, sizeof(buf), "%s/streamid=%d", + rtp_c->stream->filename, s); + if(!strncmp(path, buf, sizeof(buf))) { + // XXX: Should we reply with RTSP_STATUS_ONLY_AGGREGATE if nb_streams>1? + return rtp_c; + } + } + return NULL; +} + +static void rtsp_cmd_play(HTTPContext *c, const char *url, RTSPHeader *h) +{ + HTTPContext *rtp_c; + + rtp_c = find_rtp_session_with_url(url, h->session_id); + if (!rtp_c) { + rtsp_reply_error(c, RTSP_STATUS_SESSION); + return; + } + + if (rtp_c->state != HTTPSTATE_SEND_DATA && + rtp_c->state != HTTPSTATE_WAIT_FEED && + rtp_c->state != HTTPSTATE_READY) { + rtsp_reply_error(c, RTSP_STATUS_STATE); + return; + } + +#if 0 + /* XXX: seek in stream */ + if (h->range_start != AV_NOPTS_VALUE) { + printf("range_start=%0.3f\n", (double)h->range_start / AV_TIME_BASE); + av_seek_frame(rtp_c->fmt_in, -1, h->range_start); + } +#endif + + rtp_c->state = HTTPSTATE_SEND_DATA; + + /* now everything is OK, so we can send the connection parameters */ + rtsp_reply_header(c, RTSP_STATUS_OK); + /* session ID */ + url_fprintf(c->pb, "Session: %s\r\n", rtp_c->session_id); + url_fprintf(c->pb, "\r\n"); +} + +static void rtsp_cmd_pause(HTTPContext *c, const char *url, RTSPHeader *h) +{ + HTTPContext *rtp_c; + + rtp_c = find_rtp_session_with_url(url, h->session_id); + if (!rtp_c) { + rtsp_reply_error(c, RTSP_STATUS_SESSION); + return; + } + + if (rtp_c->state != HTTPSTATE_SEND_DATA && + rtp_c->state != HTTPSTATE_WAIT_FEED) { + rtsp_reply_error(c, RTSP_STATUS_STATE); + return; + } + + rtp_c->state = HTTPSTATE_READY; + rtp_c->first_pts = AV_NOPTS_VALUE; + /* now everything is OK, so we can send the connection parameters */ + rtsp_reply_header(c, RTSP_STATUS_OK); + /* session ID */ + url_fprintf(c->pb, "Session: %s\r\n", rtp_c->session_id); + url_fprintf(c->pb, "\r\n"); +} + +static void rtsp_cmd_teardown(HTTPContext *c, const char *url, RTSPHeader *h) +{ + HTTPContext *rtp_c; + + rtp_c = find_rtp_session_with_url(url, h->session_id); + if (!rtp_c) { + rtsp_reply_error(c, RTSP_STATUS_SESSION); + return; + } + + /* abort the session */ + close_connection(rtp_c); + + if (ff_rtsp_callback) { + ff_rtsp_callback(RTSP_ACTION_SERVER_TEARDOWN, rtp_c->session_id, + NULL, 0, + rtp_c->stream->rtsp_option); + } + + /* now everything is OK, so we can send the connection parameters */ + rtsp_reply_header(c, RTSP_STATUS_OK); + /* session ID */ + url_fprintf(c->pb, "Session: %s\r\n", rtp_c->session_id); + url_fprintf(c->pb, "\r\n"); +} + + +/********************************************************************/ +/* RTP handling */ + +static HTTPContext *rtp_new_connection(struct sockaddr_in *from_addr, + FFStream *stream, const char *session_id, + enum RTSPProtocol rtp_protocol) +{ + HTTPContext *c = NULL; + const char *proto_str; + + /* XXX: should output a warning page when coming + close to the connection limit */ + if (nb_connections >= nb_max_connections) + goto fail; + + /* add a new connection */ + c = av_mallocz(sizeof(HTTPContext)); + if (!c) + goto fail; + + c->fd = -1; + c->poll_entry = NULL; + c->from_addr = *from_addr; + c->buffer_size = IOBUFFER_INIT_SIZE; + c->buffer = av_malloc(c->buffer_size); + if (!c->buffer) + goto fail; + nb_connections++; + c->stream = stream; + pstrcpy(c->session_id, sizeof(c->session_id), session_id); + c->state = HTTPSTATE_READY; + c->is_packetized = 1; + c->rtp_protocol = rtp_protocol; + + /* protocol is shown in statistics */ + switch(c->rtp_protocol) { + case RTSP_PROTOCOL_RTP_UDP_MULTICAST: + proto_str = "MCAST"; + break; + case RTSP_PROTOCOL_RTP_UDP: + proto_str = "UDP"; + break; + case RTSP_PROTOCOL_RTP_TCP: + proto_str = "TCP"; + break; + default: + proto_str = "???"; + break; + } + pstrcpy(c->protocol, sizeof(c->protocol), "RTP/"); + pstrcat(c->protocol, sizeof(c->protocol), proto_str); + + current_bandwidth += stream->bandwidth; + + c->next = first_http_ctx; + first_http_ctx = c; + return c; + + fail: + if (c) { + av_free(c->buffer); + av_free(c); + } + return NULL; +} + +/* add a new RTP stream in an RTP connection (used in RTSP SETUP + command). If RTP/TCP protocol is used, TCP connection 'rtsp_c' is + used. */ +static int rtp_new_av_stream(HTTPContext *c, + int stream_index, struct sockaddr_in *dest_addr, + HTTPContext *rtsp_c) +{ + AVFormatContext *ctx; + AVStream *st; + char *ipaddr; + URLContext *h; + uint8_t *dummy_buf; + char buf2[32]; + int max_packet_size; + + /* now we can open the relevant output stream */ + ctx = av_alloc_format_context(); + if (!ctx) + return -1; + ctx->oformat = &rtp_mux; + + st = av_mallocz(sizeof(AVStream)); + if (!st) + goto fail; + st->codec= avcodec_alloc_context(); + ctx->nb_streams = 1; + ctx->streams[0] = st; + + if (!c->stream->feed || + c->stream->feed == c->stream) { + memcpy(st, c->stream->streams[stream_index], sizeof(AVStream)); + } else { + memcpy(st, + c->stream->feed->streams[c->stream->feed_streams[stream_index]], + sizeof(AVStream)); + } + + /* build destination RTP address */ + ipaddr = inet_ntoa(dest_addr->sin_addr); + + switch(c->rtp_protocol) { + case RTSP_PROTOCOL_RTP_UDP: + case RTSP_PROTOCOL_RTP_UDP_MULTICAST: + /* RTP/UDP case */ + + /* XXX: also pass as parameter to function ? */ + if (c->stream->is_multicast) { + int ttl; + ttl = c->stream->multicast_ttl; + if (!ttl) + ttl = 16; + snprintf(ctx->filename, sizeof(ctx->filename), + "rtp://%s:%d?multicast=1&ttl=%d", + ipaddr, ntohs(dest_addr->sin_port), ttl); + } else { + snprintf(ctx->filename, sizeof(ctx->filename), + "rtp://%s:%d", ipaddr, ntohs(dest_addr->sin_port)); + } + + if (url_open(&h, ctx->filename, URL_WRONLY) < 0) + goto fail; + c->rtp_handles[stream_index] = h; + max_packet_size = url_get_max_packet_size(h); + break; + case RTSP_PROTOCOL_RTP_TCP: + /* RTP/TCP case */ + c->rtsp_c = rtsp_c; + max_packet_size = RTSP_TCP_MAX_PACKET_SIZE; + break; + default: + goto fail; + } + + http_log("%s:%d - - [%s] \"PLAY %s/streamid=%d %s\"\n", + ipaddr, ntohs(dest_addr->sin_port), + ctime1(buf2), + c->stream->filename, stream_index, c->protocol); + + /* normally, no packets should be output here, but the packet size may be checked */ + if (url_open_dyn_packet_buf(&ctx->pb, max_packet_size) < 0) { + /* XXX: close stream */ + goto fail; + } + av_set_parameters(ctx, NULL); + if (av_write_header(ctx) < 0) { + fail: + if (h) + url_close(h); + av_free(ctx); + return -1; + } + url_close_dyn_buf(&ctx->pb, &dummy_buf); + av_free(dummy_buf); + + c->rtp_ctx[stream_index] = ctx; + return 0; +} + +/********************************************************************/ +/* ffserver initialization */ + +static AVStream *add_av_stream1(FFStream *stream, AVCodecContext *codec) +{ + AVStream *fst; + + fst = av_mallocz(sizeof(AVStream)); + if (!fst) + return NULL; + fst->codec= avcodec_alloc_context(); + fst->priv_data = av_mallocz(sizeof(FeedData)); + memcpy(fst->codec, codec, sizeof(AVCodecContext)); + fst->codec->coded_frame = &dummy_frame; + fst->index = stream->nb_streams; + av_set_pts_info(fst, 33, 1, 90000); + stream->streams[stream->nb_streams++] = fst; + return fst; +} + +/* return the stream number in the feed */ +static int add_av_stream(FFStream *feed, AVStream *st) +{ + AVStream *fst; + AVCodecContext *av, *av1; + int i; + + av = st->codec; + for(i=0;inb_streams;i++) { + st = feed->streams[i]; + av1 = st->codec; + if (av1->codec_id == av->codec_id && + av1->codec_type == av->codec_type && + av1->bit_rate == av->bit_rate) { + + switch(av->codec_type) { + case CODEC_TYPE_AUDIO: + if (av1->channels == av->channels && + av1->sample_rate == av->sample_rate) + goto found; + break; + case CODEC_TYPE_VIDEO: + if (av1->width == av->width && + av1->height == av->height && + av1->time_base.den == av->time_base.den && + av1->time_base.num == av->time_base.num && + av1->gop_size == av->gop_size) + goto found; + break; + default: + av_abort(); + } + } + } + + fst = add_av_stream1(feed, av); + if (!fst) + return -1; + return feed->nb_streams - 1; + found: + return i; +} + +static void remove_stream(FFStream *stream) +{ + FFStream **ps; + ps = &first_stream; + while (*ps != NULL) { + if (*ps == stream) { + *ps = (*ps)->next; + } else { + ps = &(*ps)->next; + } + } +} + +/* specific mpeg4 handling : we extract the raw parameters */ +static void extract_mpeg4_header(AVFormatContext *infile) +{ + int mpeg4_count, i, size; + AVPacket pkt; + AVStream *st; + const uint8_t *p; + + mpeg4_count = 0; + for(i=0;inb_streams;i++) { + st = infile->streams[i]; + if (st->codec->codec_id == CODEC_ID_MPEG4 && + st->codec->extradata_size == 0) { + mpeg4_count++; + } + } + if (!mpeg4_count) + return; + + printf("MPEG4 without extra data: trying to find header in %s\n", infile->filename); + while (mpeg4_count > 0) { + if (av_read_packet(infile, &pkt) < 0) + break; + st = infile->streams[pkt.stream_index]; + if (st->codec->codec_id == CODEC_ID_MPEG4 && + st->codec->extradata_size == 0) { + av_freep(&st->codec->extradata); + /* fill extradata with the header */ + /* XXX: we make hard suppositions here ! */ + p = pkt.data; + while (p < pkt.data + pkt.size - 4) { + /* stop when vop header is found */ + if (p[0] == 0x00 && p[1] == 0x00 && + p[2] == 0x01 && p[3] == 0xb6) { + size = p - pkt.data; + // av_hex_dump(pkt.data, size); + st->codec->extradata = av_malloc(size); + st->codec->extradata_size = size; + memcpy(st->codec->extradata, pkt.data, size); + break; + } + p++; + } + mpeg4_count--; + } + av_free_packet(&pkt); + } +} + +/* compute the needed AVStream for each file */ +static void build_file_streams(void) +{ + FFStream *stream, *stream_next; + AVFormatContext *infile; + int i; + + /* gather all streams */ + for(stream = first_stream; stream != NULL; stream = stream_next) { + stream_next = stream->next; + if (stream->stream_type == STREAM_TYPE_LIVE && + !stream->feed) { + /* the stream comes from a file */ + /* try to open the file */ + /* open stream */ + stream->ap_in = av_mallocz(sizeof(AVFormatParameters)); + if (stream->fmt == &rtp_mux) { + /* specific case : if transport stream output to RTP, + we use a raw transport stream reader */ + stream->ap_in->mpeg2ts_raw = 1; + stream->ap_in->mpeg2ts_compute_pcr = 1; + } + + if (av_open_input_file(&infile, stream->feed_filename, + stream->ifmt, 0, stream->ap_in) < 0) { + http_log("%s not found", stream->feed_filename); + /* remove stream (no need to spend more time on it) */ + fail: + remove_stream(stream); + } else { + /* find all the AVStreams inside and reference them in + 'stream' */ + if (av_find_stream_info(infile) < 0) { + http_log("Could not find codec parameters from '%s'", + stream->feed_filename); + av_close_input_file(infile); + goto fail; + } + extract_mpeg4_header(infile); + + for(i=0;inb_streams;i++) { + add_av_stream1(stream, infile->streams[i]->codec); + } + av_close_input_file(infile); + } + } + } +} + +/* compute the needed AVStream for each feed */ +static void build_feed_streams(void) +{ + FFStream *stream, *feed; + int i; + + /* gather all streams */ + for(stream = first_stream; stream != NULL; stream = stream->next) { + feed = stream->feed; + if (feed) { + if (!stream->is_feed) { + /* we handle a stream coming from a feed */ + for(i=0;inb_streams;i++) { + stream->feed_streams[i] = add_av_stream(feed, stream->streams[i]); + } + } + } + } + + /* gather all streams */ + for(stream = first_stream; stream != NULL; stream = stream->next) { + feed = stream->feed; + if (feed) { + if (stream->is_feed) { + for(i=0;inb_streams;i++) { + stream->feed_streams[i] = i; + } + } + } + } + + /* create feed files if needed */ + for(feed = first_feed; feed != NULL; feed = feed->next_feed) { + int fd; + + if (url_exist(feed->feed_filename)) { + /* See if it matches */ + AVFormatContext *s; + int matches = 0; + + if (av_open_input_file(&s, feed->feed_filename, NULL, FFM_PACKET_SIZE, NULL) >= 0) { + /* Now see if it matches */ + if (s->nb_streams == feed->nb_streams) { + matches = 1; + for(i=0;inb_streams;i++) { + AVStream *sf, *ss; + sf = feed->streams[i]; + ss = s->streams[i]; + + if (sf->index != ss->index || + sf->id != ss->id) { + printf("Index & Id do not match for stream %d (%s)\n", + i, feed->feed_filename); + matches = 0; + } else { + AVCodecContext *ccf, *ccs; + + ccf = sf->codec; + ccs = ss->codec; +#define CHECK_CODEC(x) (ccf->x != ccs->x) + + if (CHECK_CODEC(codec) || CHECK_CODEC(codec_type)) { + printf("Codecs do not match for stream %d\n", i); + matches = 0; + } else if (CHECK_CODEC(bit_rate) || CHECK_CODEC(flags)) { + printf("Codec bitrates do not match for stream %d\n", i); + matches = 0; + } else if (ccf->codec_type == CODEC_TYPE_VIDEO) { + if (CHECK_CODEC(time_base.den) || + CHECK_CODEC(time_base.num) || + CHECK_CODEC(width) || + CHECK_CODEC(height)) { + printf("Codec width, height and framerate do not match for stream %d\n", i); + matches = 0; + } + } else if (ccf->codec_type == CODEC_TYPE_AUDIO) { + if (CHECK_CODEC(sample_rate) || + CHECK_CODEC(channels) || + CHECK_CODEC(frame_size)) { + printf("Codec sample_rate, channels, frame_size do not match for stream %d\n", i); + matches = 0; + } + } else { + printf("Unknown codec type\n"); + matches = 0; + } + } + if (!matches) { + break; + } + } + } else { + printf("Deleting feed file '%s' as stream counts differ (%d != %d)\n", + feed->feed_filename, s->nb_streams, feed->nb_streams); + } + + av_close_input_file(s); + } else { + printf("Deleting feed file '%s' as it appears to be corrupt\n", + feed->feed_filename); + } + if (!matches) { + if (feed->readonly) { + printf("Unable to delete feed file '%s' as it is marked readonly\n", + feed->feed_filename); + exit(1); + } + unlink(feed->feed_filename); + } + } + if (!url_exist(feed->feed_filename)) { + AVFormatContext s1, *s = &s1; + + if (feed->readonly) { + printf("Unable to create feed file '%s' as it is marked readonly\n", + feed->feed_filename); + exit(1); + } + + /* only write the header of the ffm file */ + if (url_fopen(&s->pb, feed->feed_filename, URL_WRONLY) < 0) { + fprintf(stderr, "Could not open output feed file '%s'\n", + feed->feed_filename); + exit(1); + } + s->oformat = feed->fmt; + s->nb_streams = feed->nb_streams; + for(i=0;inb_streams;i++) { + AVStream *st; + st = feed->streams[i]; + s->streams[i] = st; + } + av_set_parameters(s, NULL); + av_write_header(s); + /* XXX: need better api */ + av_freep(&s->priv_data); + url_fclose(&s->pb); + } + /* get feed size and write index */ + fd = open(feed->feed_filename, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Could not open output feed file '%s'\n", + feed->feed_filename); + exit(1); + } + + feed->feed_write_index = ffm_read_write_index(fd); + feed->feed_size = lseek(fd, 0, SEEK_END); + /* ensure that we do not wrap before the end of file */ + if (feed->feed_max_size && feed->feed_max_size < feed->feed_size) + feed->feed_max_size = feed->feed_size; + + close(fd); + } +} + +/* compute the bandwidth used by each stream */ +static void compute_bandwidth(void) +{ + int bandwidth, i; + FFStream *stream; + + for(stream = first_stream; stream != NULL; stream = stream->next) { + bandwidth = 0; + for(i=0;inb_streams;i++) { + AVStream *st = stream->streams[i]; + switch(st->codec->codec_type) { + case CODEC_TYPE_AUDIO: + case CODEC_TYPE_VIDEO: + bandwidth += st->codec->bit_rate; + break; + default: + break; + } + } + stream->bandwidth = (bandwidth + 999) / 1000; + } +} + +static void get_arg(char *buf, int buf_size, const char **pp) +{ + const char *p; + char *q; + int quote; + + p = *pp; + while (isspace(*p)) p++; + q = buf; + quote = 0; + if (*p == '\"' || *p == '\'') + quote = *p++; + for(;;) { + if (quote) { + if (*p == quote) + break; + } else { + if (isspace(*p)) + break; + } + if (*p == '\0') + break; + if ((q - buf) < buf_size - 1) + *q++ = *p; + p++; + } + *q = '\0'; + if (quote && *p == quote) + p++; + *pp = p; +} + +/* add a codec and set the default parameters */ +static void add_codec(FFStream *stream, AVCodecContext *av) +{ + AVStream *st; + + /* compute default parameters */ + switch(av->codec_type) { + case CODEC_TYPE_AUDIO: + if (av->bit_rate == 0) + av->bit_rate = 64000; + if (av->sample_rate == 0) + av->sample_rate = 22050; + if (av->channels == 0) + av->channels = 1; + break; + case CODEC_TYPE_VIDEO: + if (av->bit_rate == 0) + av->bit_rate = 64000; + if (av->time_base.num == 0){ + av->time_base.den = 5; + av->time_base.num = 1; + } + if (av->width == 0 || av->height == 0) { + av->width = 160; + av->height = 128; + } + /* Bitrate tolerance is less for streaming */ + if (av->bit_rate_tolerance == 0) + av->bit_rate_tolerance = av->bit_rate / 4; + if (av->qmin == 0) + av->qmin = 3; + if (av->qmax == 0) + av->qmax = 31; + if (av->max_qdiff == 0) + av->max_qdiff = 3; + av->qcompress = 0.5; + av->qblur = 0.5; + + if (!av->nsse_weight) + av->nsse_weight = 8; + + av->frame_skip_cmp = FF_CMP_DCTMAX; + av->me_method = ME_EPZS; + av->rc_buffer_aggressivity = 1.0; + + if (!av->rc_eq) + av->rc_eq = "tex^qComp"; + if (!av->i_quant_factor) + av->i_quant_factor = -0.8; + if (!av->b_quant_factor) + av->b_quant_factor = 1.25; + if (!av->b_quant_offset) + av->b_quant_offset = 1.25; + if (!av->rc_max_rate) + av->rc_max_rate = av->bit_rate * 2; + + if (av->rc_max_rate && !av->rc_buffer_size) { + av->rc_buffer_size = av->rc_max_rate; + } + + + break; + default: + av_abort(); + } + + st = av_mallocz(sizeof(AVStream)); + if (!st) + return; + st->codec = avcodec_alloc_context(); + stream->streams[stream->nb_streams++] = st; + memcpy(st->codec, av, sizeof(AVCodecContext)); +} + +static int opt_audio_codec(const char *arg) +{ + AVCodec *p; + + p = first_avcodec; + while (p) { + if (!strcmp(p->name, arg) && p->type == CODEC_TYPE_AUDIO) + break; + p = p->next; + } + if (p == NULL) { + return CODEC_ID_NONE; + } + + return p->id; +} + +static int opt_video_codec(const char *arg) +{ + AVCodec *p; + + p = first_avcodec; + while (p) { + if (!strcmp(p->name, arg) && p->type == CODEC_TYPE_VIDEO) + break; + p = p->next; + } + if (p == NULL) { + return CODEC_ID_NONE; + } + + return p->id; +} + +/* simplistic plugin support */ + +#ifdef CONFIG_HAVE_DLOPEN +void load_module(const char *filename) +{ + void *dll; + void (*init_func)(void); + dll = dlopen(filename, RTLD_NOW); + if (!dll) { + fprintf(stderr, "Could not load module '%s' - %s\n", + filename, dlerror()); + return; + } + + init_func = dlsym(dll, "ffserver_module_init"); + if (!init_func) { + fprintf(stderr, + "%s: init function 'ffserver_module_init()' not found\n", + filename); + dlclose(dll); + } + + init_func(); +} +#endif + +static int parse_ffconfig(const char *filename) +{ + FILE *f; + char line[1024]; + char cmd[64]; + char arg[1024]; + const char *p; + int val, errors, line_num; + FFStream **last_stream, *stream, *redirect; + FFStream **last_feed, *feed; + AVCodecContext audio_enc, video_enc; + int audio_id, video_id; + + f = fopen(filename, "r"); + if (!f) { + perror(filename); + return -1; + } + + errors = 0; + line_num = 0; + first_stream = NULL; + last_stream = &first_stream; + first_feed = NULL; + last_feed = &first_feed; + stream = NULL; + feed = NULL; + redirect = NULL; + audio_id = CODEC_ID_NONE; + video_id = CODEC_ID_NONE; + for(;;) { + if (fgets(line, sizeof(line), f) == NULL) + break; + line_num++; + p = line; + while (isspace(*p)) + p++; + if (*p == '\0' || *p == '#') + continue; + + get_arg(cmd, sizeof(cmd), &p); + + if (!strcasecmp(cmd, "Port")) { + get_arg(arg, sizeof(arg), &p); + my_http_addr.sin_port = htons (atoi(arg)); + } else if (!strcasecmp(cmd, "BindAddress")) { + get_arg(arg, sizeof(arg), &p); + if (!inet_aton(arg, &my_http_addr.sin_addr)) { + fprintf(stderr, "%s:%d: Invalid IP address: %s\n", + filename, line_num, arg); + errors++; + } + } else if (!strcasecmp(cmd, "NoDaemon")) { + ffserver_daemon = 0; + } else if (!strcasecmp(cmd, "RTSPPort")) { + get_arg(arg, sizeof(arg), &p); + my_rtsp_addr.sin_port = htons (atoi(arg)); + } else if (!strcasecmp(cmd, "RTSPBindAddress")) { + get_arg(arg, sizeof(arg), &p); + if (!inet_aton(arg, &my_rtsp_addr.sin_addr)) { + fprintf(stderr, "%s:%d: Invalid IP address: %s\n", + filename, line_num, arg); + errors++; + } + } else if (!strcasecmp(cmd, "MaxClients")) { + get_arg(arg, sizeof(arg), &p); + val = atoi(arg); + if (val < 1 || val > HTTP_MAX_CONNECTIONS) { + fprintf(stderr, "%s:%d: Invalid MaxClients: %s\n", + filename, line_num, arg); + errors++; + } else { + nb_max_connections = val; + } + } else if (!strcasecmp(cmd, "MaxBandwidth")) { + get_arg(arg, sizeof(arg), &p); + val = atoi(arg); + if (val < 10 || val > 100000) { + fprintf(stderr, "%s:%d: Invalid MaxBandwidth: %s\n", + filename, line_num, arg); + errors++; + } else { + max_bandwidth = val; + } + } else if (!strcasecmp(cmd, "CustomLog")) { + get_arg(logfilename, sizeof(logfilename), &p); + } else if (!strcasecmp(cmd, "next; + /* add in feed list */ + *last_feed = feed; + last_feed = &feed->next_feed; + + get_arg(feed->filename, sizeof(feed->filename), &p); + q = strrchr(feed->filename, '>'); + if (*q) + *q = '\0'; + feed->fmt = guess_format("ffm", NULL, NULL); + /* defaut feed file */ + snprintf(feed->feed_filename, sizeof(feed->feed_filename), + "/tmp/%s.ffm", feed->filename); + feed->feed_max_size = 5 * 1024 * 1024; + feed->is_feed = 1; + feed->feed = feed; /* self feeding :-) */ + } + } else if (!strcasecmp(cmd, "Launch")) { + if (feed) { + int i; + + feed->child_argv = (char **) av_mallocz(64 * sizeof(char *)); + + for (i = 0; i < 62; i++) { + char argbuf[256]; + + get_arg(argbuf, sizeof(argbuf), &p); + if (!argbuf[0]) + break; + + feed->child_argv[i] = av_malloc(strlen(argbuf) + 1); + strcpy(feed->child_argv[i], argbuf); + } + + feed->child_argv[i] = av_malloc(30 + strlen(feed->filename)); + + snprintf(feed->child_argv[i], 30+strlen(feed->filename), + "http://%s:%d/%s", + (my_http_addr.sin_addr.s_addr == INADDR_ANY) ? "127.0.0.1" : + inet_ntoa(my_http_addr.sin_addr), + ntohs(my_http_addr.sin_port), feed->filename); + + if (ffserver_debug) + { + int j; + fprintf(stdout, "Launch commandline: "); + for (j = 0; j <= i; j++) + fprintf(stdout, "%s ", feed->child_argv[j]); + fprintf(stdout, "\n"); + } + } + } else if (!strcasecmp(cmd, "ReadOnlyFile")) { + if (feed) { + get_arg(feed->feed_filename, sizeof(feed->feed_filename), &p); + feed->readonly = 1; + } else if (stream) { + get_arg(stream->feed_filename, sizeof(stream->feed_filename), &p); + } + } else if (!strcasecmp(cmd, "File")) { + if (feed) { + get_arg(feed->feed_filename, sizeof(feed->feed_filename), &p); + } else if (stream) { + get_arg(stream->feed_filename, sizeof(stream->feed_filename), &p); + } + } else if (!strcasecmp(cmd, "FileMaxSize")) { + if (feed) { + const char *p1; + double fsize; + + get_arg(arg, sizeof(arg), &p); + p1 = arg; + fsize = strtod(p1, (char **)&p1); + switch(toupper(*p1)) { + case 'K': + fsize *= 1024; + break; + case 'M': + fsize *= 1024 * 1024; + break; + case 'G': + fsize *= 1024 * 1024 * 1024; + break; + } + feed->feed_max_size = (int64_t)fsize; + } + } else if (!strcasecmp(cmd, "")) { + if (!feed) { + fprintf(stderr, "%s:%d: No corresponding for \n", + filename, line_num); + errors++; +#if 0 + } else { + /* Make sure that we start out clean */ + if (unlink(feed->feed_filename) < 0 + && errno != ENOENT) { + fprintf(stderr, "%s:%d: Unable to clean old feed file '%s': %s\n", + filename, line_num, feed->feed_filename, strerror(errno)); + errors++; + } +#endif + } + feed = NULL; + } else if (!strcasecmp(cmd, "next; + + get_arg(stream->filename, sizeof(stream->filename), &p); + q = strrchr(stream->filename, '>'); + if (*q) + *q = '\0'; + stream->fmt = guess_stream_format(NULL, stream->filename, NULL); + memset(&audio_enc, 0, sizeof(AVCodecContext)); + memset(&video_enc, 0, sizeof(AVCodecContext)); + audio_id = CODEC_ID_NONE; + video_id = CODEC_ID_NONE; + if (stream->fmt) { + audio_id = stream->fmt->audio_codec; + video_id = stream->fmt->video_codec; + } + } + } else if (!strcasecmp(cmd, "Feed")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + FFStream *sfeed; + + sfeed = first_feed; + while (sfeed != NULL) { + if (!strcmp(sfeed->filename, arg)) + break; + sfeed = sfeed->next_feed; + } + if (!sfeed) { + fprintf(stderr, "%s:%d: feed '%s' not defined\n", + filename, line_num, arg); + } else { + stream->feed = sfeed; + } + } + } else if (!strcasecmp(cmd, "Format")) { + get_arg(arg, sizeof(arg), &p); + if (!strcmp(arg, "status")) { + stream->stream_type = STREAM_TYPE_STATUS; + stream->fmt = NULL; + } else { + stream->stream_type = STREAM_TYPE_LIVE; + /* jpeg cannot be used here, so use single frame jpeg */ + if (!strcmp(arg, "jpeg")) + strcpy(arg, "mjpeg"); + stream->fmt = guess_stream_format(arg, NULL, NULL); + if (!stream->fmt) { + fprintf(stderr, "%s:%d: Unknown Format: %s\n", + filename, line_num, arg); + errors++; + } + } + if (stream->fmt) { + audio_id = stream->fmt->audio_codec; + video_id = stream->fmt->video_codec; + } + } else if (!strcasecmp(cmd, "InputFormat")) { + stream->ifmt = av_find_input_format(arg); + if (!stream->ifmt) { + fprintf(stderr, "%s:%d: Unknown input format: %s\n", + filename, line_num, arg); + } + } else if (!strcasecmp(cmd, "FaviconURL")) { + if (stream && stream->stream_type == STREAM_TYPE_STATUS) { + get_arg(stream->feed_filename, sizeof(stream->feed_filename), &p); + } else { + fprintf(stderr, "%s:%d: FaviconURL only permitted for status streams\n", + filename, line_num); + errors++; + } + } else if (!strcasecmp(cmd, "Author")) { + if (stream) { + get_arg(stream->author, sizeof(stream->author), &p); + } + } else if (!strcasecmp(cmd, "Comment")) { + if (stream) { + get_arg(stream->comment, sizeof(stream->comment), &p); + } + } else if (!strcasecmp(cmd, "Copyright")) { + if (stream) { + get_arg(stream->copyright, sizeof(stream->copyright), &p); + } + } else if (!strcasecmp(cmd, "Title")) { + if (stream) { + get_arg(stream->title, sizeof(stream->title), &p); + } + } else if (!strcasecmp(cmd, "Preroll")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + stream->prebuffer = atof(arg) * 1000; + } + } else if (!strcasecmp(cmd, "StartSendOnKey")) { + if (stream) { + stream->send_on_key = 1; + } + } else if (!strcasecmp(cmd, "AudioCodec")) { + get_arg(arg, sizeof(arg), &p); + audio_id = opt_audio_codec(arg); + if (audio_id == CODEC_ID_NONE) { + fprintf(stderr, "%s:%d: Unknown AudioCodec: %s\n", + filename, line_num, arg); + errors++; + } + } else if (!strcasecmp(cmd, "VideoCodec")) { + get_arg(arg, sizeof(arg), &p); + video_id = opt_video_codec(arg); + if (video_id == CODEC_ID_NONE) { + fprintf(stderr, "%s:%d: Unknown VideoCodec: %s\n", + filename, line_num, arg); + errors++; + } + } else if (!strcasecmp(cmd, "MaxTime")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + stream->max_time = atof(arg) * 1000; + } + } else if (!strcasecmp(cmd, "AudioBitRate")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + audio_enc.bit_rate = atoi(arg) * 1000; + } + } else if (!strcasecmp(cmd, "AudioChannels")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + audio_enc.channels = atoi(arg); + } + } else if (!strcasecmp(cmd, "AudioSampleRate")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + audio_enc.sample_rate = atoi(arg); + } + } else if (!strcasecmp(cmd, "AudioQuality")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { +// audio_enc.quality = atof(arg) * 1000; + } + } else if (!strcasecmp(cmd, "VideoBitRateRange")) { + if (stream) { + int minrate, maxrate; + + get_arg(arg, sizeof(arg), &p); + + if (sscanf(arg, "%d-%d", &minrate, &maxrate) == 2) { + video_enc.rc_min_rate = minrate * 1000; + video_enc.rc_max_rate = maxrate * 1000; + } else { + fprintf(stderr, "%s:%d: Incorrect format for VideoBitRateRange -- should be -: %s\n", + filename, line_num, arg); + errors++; + } + } + } else if (!strcasecmp(cmd, "Debug")) { + if (stream) { + get_arg(arg, sizeof(arg), &p); + video_enc.debug = strtol(arg,0,0); + } + } else if (!strcasecmp(cmd, "Strict")) { + if (stream) { + get_arg(arg, sizeof(arg), &p); + video_enc.strict_std_compliance = atoi(arg); + } + } else if (!strcasecmp(cmd, "VideoBufferSize")) { + if (stream) { + get_arg(arg, sizeof(arg), &p); + video_enc.rc_buffer_size = atoi(arg) * 8*1024; + } + } else if (!strcasecmp(cmd, "VideoBitRateTolerance")) { + if (stream) { + get_arg(arg, sizeof(arg), &p); + video_enc.bit_rate_tolerance = atoi(arg) * 1000; + } + } else if (!strcasecmp(cmd, "VideoBitRate")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.bit_rate = atoi(arg) * 1000; + } + } else if (!strcasecmp(cmd, "VideoSize")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + parse_image_size(&video_enc.width, &video_enc.height, arg); + if ((video_enc.width % 16) != 0 || + (video_enc.height % 16) != 0) { + fprintf(stderr, "%s:%d: Image size must be a multiple of 16\n", + filename, line_num); + errors++; + } + } + } else if (!strcasecmp(cmd, "VideoFrameRate")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.time_base.num= DEFAULT_FRAME_RATE_BASE; + video_enc.time_base.den = (int)(strtod(arg, NULL) * video_enc.time_base.num); + } + } else if (!strcasecmp(cmd, "VideoGopSize")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.gop_size = atoi(arg); + } + } else if (!strcasecmp(cmd, "VideoIntraOnly")) { + if (stream) { + video_enc.gop_size = 1; + } + } else if (!strcasecmp(cmd, "VideoHighQuality")) { + if (stream) { + video_enc.mb_decision = FF_MB_DECISION_BITS; + } + } else if (!strcasecmp(cmd, "Video4MotionVector")) { + if (stream) { + video_enc.mb_decision = FF_MB_DECISION_BITS; //FIXME remove + video_enc.flags |= CODEC_FLAG_4MV; + } + } else if (!strcasecmp(cmd, "BitExact")) { + if (stream) { + video_enc.flags |= CODEC_FLAG_BITEXACT; + } + } else if (!strcasecmp(cmd, "DctFastint")) { + if (stream) { + video_enc.dct_algo = FF_DCT_FASTINT; + } + } else if (!strcasecmp(cmd, "IdctSimple")) { + if (stream) { + video_enc.idct_algo = FF_IDCT_SIMPLE; + } + } else if (!strcasecmp(cmd, "Qscale")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.flags |= CODEC_FLAG_QSCALE; + video_enc.global_quality = FF_QP2LAMBDA * atoi(arg); + } + } else if (!strcasecmp(cmd, "VideoQDiff")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.max_qdiff = atoi(arg); + if (video_enc.max_qdiff < 1 || video_enc.max_qdiff > 31) { + fprintf(stderr, "%s:%d: VideoQDiff out of range\n", + filename, line_num); + errors++; + } + } + } else if (!strcasecmp(cmd, "VideoQMax")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.qmax = atoi(arg); + if (video_enc.qmax < 1 || video_enc.qmax > 31) { + fprintf(stderr, "%s:%d: VideoQMax out of range\n", + filename, line_num); + errors++; + } + } + } else if (!strcasecmp(cmd, "VideoQMin")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.qmin = atoi(arg); + if (video_enc.qmin < 1 || video_enc.qmin > 31) { + fprintf(stderr, "%s:%d: VideoQMin out of range\n", + filename, line_num); + errors++; + } + } + } else if (!strcasecmp(cmd, "LumaElim")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.luma_elim_threshold = atoi(arg); + } + } else if (!strcasecmp(cmd, "ChromaElim")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.chroma_elim_threshold = atoi(arg); + } + } else if (!strcasecmp(cmd, "LumiMask")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.lumi_masking = atof(arg); + } + } else if (!strcasecmp(cmd, "DarkMask")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + video_enc.dark_masking = atof(arg); + } + } else if (!strcasecmp(cmd, "NoVideo")) { + video_id = CODEC_ID_NONE; + } else if (!strcasecmp(cmd, "NoAudio")) { + audio_id = CODEC_ID_NONE; + } else if (!strcasecmp(cmd, "ACL")) { + IPAddressACL acl; + struct hostent *he; + + get_arg(arg, sizeof(arg), &p); + if (strcasecmp(arg, "allow") == 0) { + acl.action = IP_ALLOW; + } else if (strcasecmp(arg, "deny") == 0) { + acl.action = IP_DENY; + } else { + fprintf(stderr, "%s:%d: ACL action '%s' is not ALLOW or DENY\n", + filename, line_num, arg); + errors++; + } + + get_arg(arg, sizeof(arg), &p); + + he = gethostbyname(arg); + if (!he) { + fprintf(stderr, "%s:%d: ACL refers to invalid host or ip address '%s'\n", + filename, line_num, arg); + errors++; + } else { + /* Only take the first */ + acl.first.s_addr = ntohl(((struct in_addr *) he->h_addr_list[0])->s_addr); + acl.last = acl.first; + } + + get_arg(arg, sizeof(arg), &p); + + if (arg[0]) { + he = gethostbyname(arg); + if (!he) { + fprintf(stderr, "%s:%d: ACL refers to invalid host or ip address '%s'\n", + filename, line_num, arg); + errors++; + } else { + /* Only take the first */ + acl.last.s_addr = ntohl(((struct in_addr *) he->h_addr_list[0])->s_addr); + } + } + + if (!errors) { + IPAddressACL *nacl = (IPAddressACL *) av_mallocz(sizeof(*nacl)); + IPAddressACL **naclp = 0; + + *nacl = acl; + nacl->next = 0; + + if (stream) { + naclp = &stream->acl; + } else if (feed) { + naclp = &feed->acl; + } else { + fprintf(stderr, "%s:%d: ACL found not in or \n", + filename, line_num); + errors++; + } + + if (naclp) { + while (*naclp) + naclp = &(*naclp)->next; + + *naclp = nacl; + } + } + } else if (!strcasecmp(cmd, "RTSPOption")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + av_freep(&stream->rtsp_option); + /* XXX: av_strdup ? */ + stream->rtsp_option = av_malloc(strlen(arg) + 1); + if (stream->rtsp_option) { + strcpy(stream->rtsp_option, arg); + } + } + } else if (!strcasecmp(cmd, "MulticastAddress")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + if (!inet_aton(arg, &stream->multicast_ip)) { + fprintf(stderr, "%s:%d: Invalid IP address: %s\n", + filename, line_num, arg); + errors++; + } + stream->is_multicast = 1; + stream->loop = 1; /* default is looping */ + } + } else if (!strcasecmp(cmd, "MulticastPort")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + stream->multicast_port = atoi(arg); + } + } else if (!strcasecmp(cmd, "MulticastTTL")) { + get_arg(arg, sizeof(arg), &p); + if (stream) { + stream->multicast_ttl = atoi(arg); + } + } else if (!strcasecmp(cmd, "NoLoop")) { + if (stream) { + stream->loop = 0; + } + } else if (!strcasecmp(cmd, "")) { + if (!stream) { + fprintf(stderr, "%s:%d: No corresponding for \n", + filename, line_num); + errors++; + } + if (stream->feed && stream->fmt && strcmp(stream->fmt->name, "ffm") != 0) { + if (audio_id != CODEC_ID_NONE) { + audio_enc.codec_type = CODEC_TYPE_AUDIO; + audio_enc.codec_id = audio_id; + add_codec(stream, &audio_enc); + } + if (video_id != CODEC_ID_NONE) { + video_enc.codec_type = CODEC_TYPE_VIDEO; + video_enc.codec_id = video_id; + add_codec(stream, &video_enc); + } + } + stream = NULL; + } else if (!strcasecmp(cmd, "next; + + get_arg(redirect->filename, sizeof(redirect->filename), &p); + q = strrchr(redirect->filename, '>'); + if (*q) + *q = '\0'; + redirect->stream_type = STREAM_TYPE_REDIRECT; + } + } else if (!strcasecmp(cmd, "URL")) { + if (redirect) { + get_arg(redirect->feed_filename, sizeof(redirect->feed_filename), &p); + } + } else if (!strcasecmp(cmd, "")) { + if (!redirect) { + fprintf(stderr, "%s:%d: No corresponding for \n", + filename, line_num); + errors++; + } + if (!redirect->feed_filename[0]) { + fprintf(stderr, "%s:%d: No URL found for \n", + filename, line_num); + errors++; + } + redirect = NULL; + } else if (!strcasecmp(cmd, "LoadModule")) { + get_arg(arg, sizeof(arg), &p); +#ifdef CONFIG_HAVE_DLOPEN + load_module(arg); +#else + fprintf(stderr, "%s:%d: Module support not compiled into this version: '%s'\n", + filename, line_num, arg); + errors++; +#endif + } else { + fprintf(stderr, "%s:%d: Incorrect keyword: '%s'\n", + filename, line_num, cmd); + errors++; + } + } + + fclose(f); + if (errors) + return -1; + else + return 0; +} + + +#if 0 +static void write_packet(FFCodec *ffenc, + uint8_t *buf, int size) +{ + PacketHeader hdr; + AVCodecContext *enc = &ffenc->enc; + uint8_t *wptr; + mk_header(&hdr, enc, size); + wptr = http_fifo.wptr; + fifo_write(&http_fifo, (uint8_t *)&hdr, sizeof(hdr), &wptr); + fifo_write(&http_fifo, buf, size, &wptr); + /* atomic modification of wptr */ + http_fifo.wptr = wptr; + ffenc->data_count += size; + ffenc->avg_frame_size = ffenc->avg_frame_size * AVG_COEF + size * (1.0 - AVG_COEF); +} +#endif + +static void show_banner(void) +{ + printf("ffserver version " FFMPEG_VERSION ", Copyright (c) 2000-2003 Fabrice Bellard\n"); +} + +static void show_help(void) +{ + show_banner(); + printf("usage: ffserver [-L] [-h] [-f configfile]\n" + "Hyper fast multi format Audio/Video streaming server\n" + "\n" + "-L : print the LICENSE\n" + "-h : this help\n" + "-f configfile : use configfile instead of /etc/ffserver.conf\n" + ); +} + +static void show_license(void) +{ + show_banner(); + printf( + "This library is free software; you can redistribute it and/or\n" + "modify it under the terms of the GNU Lesser General Public\n" + "License as published by the Free Software Foundation; either\n" + "version 2 of the License, or (at your option) any later version.\n" + "\n" + "This library is distributed in the hope that it will be useful,\n" + "but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n" + "Lesser General Public License for more details.\n" + "\n" + "You should have received a copy of the GNU Lesser General Public\n" + "License along with this library; if not, write to the Free Software\n" + "Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA\n" + ); +} + +static void handle_child_exit(int sig) +{ + pid_t pid; + int status; + + while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { + FFStream *feed; + + for (feed = first_feed; feed; feed = feed->next) { + if (feed->pid == pid) { + int uptime = time(0) - feed->pid_start; + + feed->pid = 0; + fprintf(stderr, "%s: Pid %d exited with status %d after %d seconds\n", feed->filename, pid, status, uptime); + + if (uptime < 30) { + /* Turn off any more restarts */ + feed->child_argv = 0; + } + } + } + } + + need_to_start_children = 1; +} + +int main(int argc, char **argv) +{ + const char *config_filename; + int c; + struct sigaction sigact; + + av_register_all(); + + config_filename = "/etc/ffserver.conf"; + + my_program_name = argv[0]; + my_program_dir = getcwd(0, 0); + ffserver_daemon = 1; + + for(;;) { + c = getopt(argc, argv, "ndLh?f:"); + if (c == -1) + break; + switch(c) { + case 'L': + show_license(); + exit(1); + case '?': + case 'h': + show_help(); + exit(1); + case 'n': + no_launch = 1; + break; + case 'd': + ffserver_debug = 1; + ffserver_daemon = 0; + break; + case 'f': + config_filename = optarg; + break; + default: + exit(2); + } + } + + putenv("http_proxy"); /* Kill the http_proxy */ + + srandom(gettime_ms() + (getpid() << 16)); + + /* address on which the server will handle HTTP connections */ + my_http_addr.sin_family = AF_INET; + my_http_addr.sin_port = htons (8080); + my_http_addr.sin_addr.s_addr = htonl (INADDR_ANY); + + /* address on which the server will handle RTSP connections */ + my_rtsp_addr.sin_family = AF_INET; + my_rtsp_addr.sin_port = htons (5454); + my_rtsp_addr.sin_addr.s_addr = htonl (INADDR_ANY); + + nb_max_connections = 5; + max_bandwidth = 1000; + first_stream = NULL; + logfilename[0] = '\0'; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = handle_child_exit; + sigact.sa_flags = SA_NOCLDSTOP | SA_RESTART; + sigaction(SIGCHLD, &sigact, 0); + + if (parse_ffconfig(config_filename) < 0) { + fprintf(stderr, "Incorrect config file - exiting.\n"); + exit(1); + } + + build_file_streams(); + + build_feed_streams(); + + compute_bandwidth(); + + /* put the process in background and detach it from its TTY */ + if (ffserver_daemon) { + int pid; + + pid = fork(); + if (pid < 0) { + perror("fork"); + exit(1); + } else if (pid > 0) { + /* parent : exit */ + exit(0); + } else { + /* child */ + setsid(); + chdir("/"); + close(0); + open("/dev/null", O_RDWR); + if (strcmp(logfilename, "-") != 0) { + close(1); + dup(0); + } + close(2); + dup(0); + } + } + + /* signal init */ + signal(SIGPIPE, SIG_IGN); + + /* open log file if needed */ + if (logfilename[0] != '\0') { + if (!strcmp(logfilename, "-")) + logfile = stdout; + else + logfile = fopen(logfilename, "w"); + } + + if (http_server() < 0) { + fprintf(stderr, "Could not start server\n"); + exit(1); + } + + return 0; +} diff --git a/mpeg4/src/ffserver.h b/mpeg4/src/ffserver.h new file mode 100644 index 0000000000000000000000000000000000000000..868e4cd9b9160cec8107f48a836d90a9dc041bf2 --- /dev/null +++ b/mpeg4/src/ffserver.h @@ -0,0 +1,8 @@ +#ifndef FFSERVER_H +#define FFSERVER_H + +/* interface between ffserver and modules */ + +void ffserver_module_init(void); + +#endif diff --git a/mpeg4/src/libavcodec/.cvsignore b/mpeg4/src/libavcodec/.cvsignore new file mode 100644 index 0000000000000000000000000000000000000000..b6a5f4cf9da973e2ded14ce833af3f510416ed60 --- /dev/null +++ b/mpeg4/src/libavcodec/.cvsignore @@ -0,0 +1,6 @@ +Makefile.* +.depend +amr +amr*_float +apiexample +*-test diff --git a/mpeg4/src/libavcodec/4xm.c b/mpeg4/src/libavcodec/4xm.c new file mode 100644 index 0000000000000000000000000000000000000000..3ca2338d243f28c345ece6b29fdad79c2a72e511 --- /dev/null +++ b/mpeg4/src/libavcodec/4xm.c @@ -0,0 +1,753 @@ +/* + * 4XM codec + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file 4xm.c + * 4XM codec. + */ + +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +//#undef NDEBUG +//#include + +#define BLOCK_TYPE_VLC_BITS 5 +#define ACDC_VLC_BITS 9 + +#define CFRAME_BUFFER_COUNT 100 + +static const uint8_t block_type_tab[4][8][2]={ + { //{8,4,2}x{8,4,2} + { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0} + },{ //{8,4}x1 + { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0} + },{ //1x{8,4} + { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0} + },{ //1x2, 2x1 + { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4} + } +}; + +static const uint8_t size2index[4][4]={ + {-1, 3, 1, 1}, + { 3, 0, 0, 0}, + { 2, 0, 0, 0}, + { 2, 0, 0, 0}, +}; + +static const int8_t mv[256][2]={ +{ 0, 0},{ 0, -1},{ -1, 0},{ 1, 0},{ 0, 1},{ -1, -1},{ 1, -1},{ -1, 1}, +{ 1, 1},{ 0, -2},{ -2, 0},{ 2, 0},{ 0, 2},{ -1, -2},{ 1, -2},{ -2, -1}, +{ 2, -1},{ -2, 1},{ 2, 1},{ -1, 2},{ 1, 2},{ -2, -2},{ 2, -2},{ -2, 2}, +{ 2, 2},{ 0, -3},{ -3, 0},{ 3, 0},{ 0, 3},{ -1, -3},{ 1, -3},{ -3, -1}, +{ 3, -1},{ -3, 1},{ 3, 1},{ -1, 3},{ 1, 3},{ -2, -3},{ 2, -3},{ -3, -2}, +{ 3, -2},{ -3, 2},{ 3, 2},{ -2, 3},{ 2, 3},{ 0, -4},{ -4, 0},{ 4, 0}, +{ 0, 4},{ -1, -4},{ 1, -4},{ -4, -1},{ 4, -1},{ 4, 1},{ -1, 4},{ 1, 4}, +{ -3, -3},{ -3, 3},{ 3, 3},{ -2, -4},{ -4, -2},{ 4, -2},{ -4, 2},{ -2, 4}, +{ 2, 4},{ -3, -4},{ 3, -4},{ 4, -3},{ -5, 0},{ -4, 3},{ -3, 4},{ 3, 4}, +{ -1, -5},{ -5, -1},{ -5, 1},{ -1, 5},{ -2, -5},{ 2, -5},{ 5, -2},{ 5, 2}, +{ -4, -4},{ -4, 4},{ -3, -5},{ -5, -3},{ -5, 3},{ 3, 5},{ -6, 0},{ 0, 6}, +{ -6, -1},{ -6, 1},{ 1, 6},{ 2, -6},{ -6, 2},{ 2, 6},{ -5, -4},{ 5, 4}, +{ 4, 5},{ -6, -3},{ 6, 3},{ -7, 0},{ -1, -7},{ 5, -5},{ -7, 1},{ -1, 7}, +{ 4, -6},{ 6, 4},{ -2, -7},{ -7, 2},{ -3, -7},{ 7, -3},{ 3, 7},{ 6, -5}, +{ 0, -8},{ -1, -8},{ -7, -4},{ -8, 1},{ 4, 7},{ 2, -8},{ -2, 8},{ 6, 6}, +{ -8, 3},{ 5, -7},{ -5, 7},{ 8, -4},{ 0, -9},{ -9, -1},{ 1, 9},{ 7, -6}, +{ -7, 6},{ -5, -8},{ -5, 8},{ -9, 3},{ 9, -4},{ 7, -7},{ 8, -6},{ 6, 8}, +{ 10, 1},{-10, 2},{ 9, -5},{ 10, -3},{ -8, -7},{-10, -4},{ 6, -9},{-11, 0}, +{ 11, 1},{-11, -2},{ -2, 11},{ 7, -9},{ -7, 9},{ 10, 6},{ -4, 11},{ 8, -9}, +{ 8, 9},{ 5, 11},{ 7,-10},{ 12, -3},{ 11, 6},{ -9, -9},{ 8, 10},{ 5, 12}, +{-11, 7},{ 13, 2},{ 6,-12},{ 10, 9},{-11, 8},{ -7, 12},{ 0, 14},{ 14, -2}, +{ -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{ 5, 14},{-15, -1},{-14, -6},{ 3,-15}, +{ 11,-11},{ -7, 14},{ -5, 15},{ 8,-14},{ 15, 6},{ 3, 16},{ 7,-15},{-16, 5}, +{ 0, 17},{-16, -6},{-10, 14},{-16, 7},{ 12, 13},{-16, 8},{-17, 6},{-18, 3}, +{ -7, 17},{ 15, 11},{ 16, 10},{ 2,-19},{ 3,-19},{-11,-16},{-18, 8},{-19, -6}, +{ 2,-20},{-17,-11},{-10,-18},{ 8, 19},{-21, -1},{-20, 7},{ -4, 21},{ 21, 5}, +{ 15, 16},{ 2,-22},{-10,-20},{-22, 5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5}, +{ 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24}, +{ 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27, 6},{ 1,-28}, +{-11, 26},{-17,-23},{ 7, 28},{ 11,-27},{ 29, 5},{-23,-19},{-28,-11},{-21, 22}, +{-30, 7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27}, +{-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32} +}; + +// this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table +static const uint8_t dequant_table[64]={ + 16, 15, 13, 19, 24, 31, 28, 17, + 17, 23, 25, 31, 36, 63, 45, 21, + 18, 24, 27, 37, 52, 59, 49, 20, + 16, 28, 34, 40, 60, 80, 51, 20, + 18, 31, 48, 66, 68, 86, 56, 21, + 19, 38, 56, 59, 64, 64, 48, 20, + 27, 48, 55, 55, 56, 51, 35, 15, + 20, 35, 34, 32, 31, 22, 15, 8, +}; + +static VLC block_type_vlc[4]; + + +typedef struct CFrameBuffer{ + int allocated_size; + int size; + int id; + uint8_t *data; +}CFrameBuffer; + +typedef struct FourXContext{ + AVCodecContext *avctx; + DSPContext dsp; + AVFrame current_picture, last_picture; + GetBitContext pre_gb; ///< ac/dc prefix + GetBitContext gb; + uint8_t *bytestream; + uint16_t *wordstream; + int mv[256]; + VLC pre_vlc; + int last_dc; + DECLARE_ALIGNED_8(DCTELEM, block[6][64]); + uint8_t *bitstream_buffer; + unsigned int bitstream_buffer_size; + CFrameBuffer cfrm[CFRAME_BUFFER_COUNT]; +} FourXContext; + + +#define FIX_1_082392200 70936 +#define FIX_1_414213562 92682 +#define FIX_1_847759065 121095 +#define FIX_2_613125930 171254 + +#define MULTIPLY(var,const) (((var)*(const)) >> 16) + +static void idct(DCTELEM block[64]){ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z5, z10, z11, z12, z13; + int i; + int temp[64]; + + for(i=0; i<8; i++){ + tmp10 = block[8*0 + i] + block[8*4 + i]; + tmp11 = block[8*0 + i] - block[8*4 + i]; + + tmp13 = block[8*2 + i] + block[8*6 + i]; + tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13; + + tmp0 = tmp10 + tmp13; + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + z13 = block[8*5 + i] + block[8*3 + i]; + z10 = block[8*5 + i] - block[8*3 + i]; + z11 = block[8*1 + i] + block[8*7 + i]; + z12 = block[8*1 + i] - block[8*7 + i]; + + tmp7 = z11 + z13; + tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); + + z5 = MULTIPLY(z10 + z12, FIX_1_847759065); + tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; + tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; + + tmp6 = tmp12 - tmp7; + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + temp[8*0 + i] = tmp0 + tmp7; + temp[8*7 + i] = tmp0 - tmp7; + temp[8*1 + i] = tmp1 + tmp6; + temp[8*6 + i] = tmp1 - tmp6; + temp[8*2 + i] = tmp2 + tmp5; + temp[8*5 + i] = tmp2 - tmp5; + temp[8*4 + i] = tmp3 + tmp4; + temp[8*3 + i] = tmp3 - tmp4; + } + + for(i=0; i<8*8; i+=8){ + tmp10 = temp[0 + i] + temp[4 + i]; + tmp11 = temp[0 + i] - temp[4 + i]; + + tmp13 = temp[2 + i] + temp[6 + i]; + tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13; + + tmp0 = tmp10 + tmp13; + tmp3 = tmp10 - tmp13; + tmp1 = tmp11 + tmp12; + tmp2 = tmp11 - tmp12; + + z13 = temp[5 + i] + temp[3 + i]; + z10 = temp[5 + i] - temp[3 + i]; + z11 = temp[1 + i] + temp[7 + i]; + z12 = temp[1 + i] - temp[7 + i]; + + tmp7 = z11 + z13; + tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); + + z5 = MULTIPLY(z10 + z12, FIX_1_847759065); + tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; + tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; + + tmp6 = tmp12 - tmp7; + tmp5 = tmp11 - tmp6; + tmp4 = tmp10 + tmp5; + + block[0 + i] = (tmp0 + tmp7)>>6; + block[7 + i] = (tmp0 - tmp7)>>6; + block[1 + i] = (tmp1 + tmp6)>>6; + block[6 + i] = (tmp1 - tmp6)>>6; + block[2 + i] = (tmp2 + tmp5)>>6; + block[5 + i] = (tmp2 - tmp5)>>6; + block[4 + i] = (tmp3 + tmp4)>>6; + block[3 + i] = (tmp3 - tmp4)>>6; + } +} + +static void init_vlcs(FourXContext *f){ + int i; + + for(i=0; i<4; i++){ + init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7, + &block_type_tab[i][0][1], 2, 1, + &block_type_tab[i][0][0], 2, 1, 1); + } +} + +static void init_mv(FourXContext *f){ + int i; + + for(i=0; i<256; i++){ + f->mv[i] = mv[i][0] + mv[i][1]*f->current_picture.linesize[0]/2; + } +} + +static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){ + int i; + dc*= 0x10001; + + switch(log2w){ + case 0: + for(i=0; igb, block_type_vlc[index].table, BLOCK_TYPE_VLC_BITS, 1); + + assert(code>=0 && code<=6); + + if(code == 0){ + src += f->mv[ *f->bytestream++ ]; + mcdc(dst, src, log2w, h, stride, 1, 0); + }else if(code == 1){ + log2h--; + decode_p_block(f, dst , src , log2w, log2h, stride); + decode_p_block(f, dst + (stride<mv[ *f->bytestream++ ]; + mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++)); + }else if(code == 5){ + mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++)); + }else if(code == 6){ + if(log2w){ + dst[0] = le2me_16(*f->wordstream++); + dst[1] = le2me_16(*f->wordstream++); + }else{ + dst[0 ] = le2me_16(*f->wordstream++); + dst[stride] = le2me_16(*f->wordstream++); + } + } +} + +static int get32(void *p){ + return le2me_32(*(uint32_t*)p); +} + +static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){ + int x, y; + const int width= f->avctx->width; + const int height= f->avctx->height; + uint16_t *src= (uint16_t*)f->last_picture.data[0]; + uint16_t *dst= (uint16_t*)f->current_picture.data[0]; + const int stride= f->current_picture.linesize[0]>>1; + const unsigned int bitstream_size= get32(buf+8); + const unsigned int bytestream_size= get32(buf+16); + const unsigned int wordstream_size= get32(buf+12); + + if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length + || bitstream_size > (1<<26) + || bytestream_size > (1<<26) + || wordstream_size > (1<<26) + ){ + av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size, + bitstream_size+ bytestream_size+ wordstream_size - length); + return -1; + } + + f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE); + f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4); + init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size); + + f->wordstream= (uint16_t*)(buf + 20 + bitstream_size); + f->bytestream= buf + 20 + bitstream_size + wordstream_size; + + init_mv(f); + + for(y=0; ygb)+31)/32*4) + av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n", + bitstream_size - (get_bits_count(&f->gb)+31)/32*4, + bytestream_size - (f->bytestream - (buf + 20 + bitstream_size + wordstream_size)), + wordstream_size - (((uint8_t*)f->wordstream) - (buf + 20 + bitstream_size)) + ); + + return 0; +} + +/** + * decode block and dequantize. + * Note this is allmost identical to mjpeg + */ +static int decode_i_block(FourXContext *f, DCTELEM *block){ + int code, i, j, level, val; + + /* DC coef */ + val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); + if (val>>4){ + av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n"); + } + + if(val) + val = get_xbits(&f->gb, val); + + val = val * dequant_table[0] + f->last_dc; + f->last_dc = + block[0] = val; + /* AC coefs */ + i = 1; + for(;;) { + code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3); + + /* EOB */ + if (code == 0) + break; + if (code == 0xf0) { + i += 16; + } else { + level = get_xbits(&f->gb, code & 0xf); + i += code >> 4; + if (i >= 64) { + av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i); + return 0; + } + + j= ff_zigzag_direct[i]; + block[j] = level * dequant_table[j]; + i++; + if (i >= 64) + break; + } + } + + return 0; +} + +static inline void idct_put(FourXContext *f, int x, int y){ + DCTELEM (*block)[64]= f->block; + int stride= f->current_picture.linesize[0]>>1; + int i; + uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x; + + for(i=0; i<4; i++){ + block[i][0] += 0x80*8*8; + idct(block[i]); + } + + if(!(f->avctx->flags&CODEC_FLAG_GRAY)){ + for(i=4; i<6; i++) idct(block[i]); + } + +/* Note transform is: +y= ( 1b + 4g + 2r)/14 +cb=( 3b - 2g - 1r)/14 +cr=(-1b - 4g + 5r)/14 +*/ + for(y=0; y<8; y++){ + for(x=0; x<8; x++){ + DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize + int cb= block[4][x + 8*y]; + int cr= block[5][x + 8*y]; + int cg= (cb + cr)>>1; + int y; + + cb+=cb; + + y = temp[0]; + dst[0 ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); + y = temp[1]; + dst[1 ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); + y = temp[8]; + dst[ stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); + y = temp[9]; + dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8); + dst += 2; + } + dst += 2*stride - 2*8; + } +} + +static int decode_i_mb(FourXContext *f){ + int i; + + f->dsp.clear_blocks(f->block[0]); + + for(i=0; i<6; i++){ + if(decode_i_block(f, f->block[i]) < 0) + return -1; + } + + return 0; +} + +static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){ + int frequency[512]; + uint8_t flag[512]; + int up[512]; + uint8_t len_tab[257]; + int bits_tab[257]; + int start, end; + uint8_t *ptr= buf; + int j; + + memset(frequency, 0, sizeof(frequency)); + memset(up, -1, sizeof(up)); + + start= *ptr++; + end= *ptr++; + for(;;){ + int i; + + for(i=start; i<=end; i++){ + frequency[i]= *ptr++; +// printf("%d %d %d\n", start, end, frequency[i]); + } + start= *ptr++; + if(start==0) break; + + end= *ptr++; + } + frequency[256]=1; + + while((ptr - buf)&3) ptr++; // 4byte align + +// for(j=0; j<16; j++) +// printf("%2X", ptr[j]); + + for(j=257; j<512; j++){ + int min_freq[2]= {256*256, 256*256}; + int smallest[2]= {0, 0}; + int i; + for(i=0; i 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ? + } + + bits_tab[j]= bits; + len_tab[j]= len; + } + + init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, + len_tab , 1, 1, + bits_tab, 4, 4, 0); + + return ptr; +} + +static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){ + int x, y; + const int width= f->avctx->width; + const int height= f->avctx->height; + uint16_t *dst= (uint16_t*)f->current_picture.data[0]; + const int stride= f->current_picture.linesize[0]>>1; + const unsigned int bitstream_size= get32(buf); + const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8); + unsigned int prestream_size= 4*get32(buf + bitstream_size + 4); + uint8_t *prestream= buf + bitstream_size + 12; + + if(prestream_size + bitstream_size + 12 != length + || bitstream_size > (1<<26) + || prestream_size > (1<<26)){ + av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length); + return -1; + } + + prestream= read_huffman_tables(f, prestream); + + init_get_bits(&f->gb, buf + 4, 8*bitstream_size); + + prestream_size= length + buf - prestream; + + f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE); + f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)prestream, prestream_size/4); + init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size); + + f->last_dc= 0*128*8*8; + + for(y=0; ypre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256) + av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n"); + + return 0; +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + FourXContext * const f = avctx->priv_data; + AVFrame *picture = data; + AVFrame *p, temp; + int i, frame_4cc, frame_size; + + frame_4cc= get32(buf); + if(buf_size != get32(buf+4)+8){ + av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, get32(buf+4)); + } + + if(frame_4cc == ff_get_fourcc("cfrm")){ + int free_index=-1; + const int data_size= buf_size - 20; + const int id= get32(buf+12); + const int whole_size= get32(buf+16); + CFrameBuffer *cfrm; + + for(i=0; icfrm[i].id && f->cfrm[i].id < avctx->frame_number) + av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id); + } + + for(i=0; icfrm[i].id == id) break; + if(f->cfrm[i].size == 0 ) free_index= i; + } + + if(i>=CFRAME_BUFFER_COUNT){ + i= free_index; + f->cfrm[i].id= id; + } + cfrm= &f->cfrm[i]; + + cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE); + + memcpy(cfrm->data + cfrm->size, buf+20, data_size); + cfrm->size += data_size; + + if(cfrm->size >= whole_size){ + buf= cfrm->data; + frame_size= cfrm->size; + + if(id != avctx->frame_number){ + av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number); + } + + cfrm->size= cfrm->id= 0; + frame_4cc= ff_get_fourcc("pfrm"); + }else + return buf_size; + }else{ + buf= buf + 12; + frame_size= buf_size - 12; + } + + temp= f->current_picture; + f->current_picture= f->last_picture; + f->last_picture= temp; + + p= &f->current_picture; + avctx->coded_frame= p; + + avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference= 1; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + if(frame_4cc == ff_get_fourcc("ifrm")){ + p->pict_type= I_TYPE; + if(decode_i_frame(f, buf, frame_size) < 0) + return -1; + }else if(frame_4cc == ff_get_fourcc("pfrm")){ + p->pict_type= P_TYPE; + if(decode_p_frame(f, buf, frame_size) < 0) + return -1; + }else if(frame_4cc == ff_get_fourcc("snd_")){ + av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size); + }else{ + av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size); + } + +#if 0 +for(i=0; i<20; i++){ + printf("%2X %c ", buf[i], clip(buf[i],16,126)); +} +#endif + + p->key_frame= p->pict_type == I_TYPE; + + *picture= *p; + *data_size = sizeof(AVPicture); + + emms_c(); + + return buf_size; +} + + +static void common_init(AVCodecContext *avctx){ + FourXContext * const f = avctx->priv_data; + + dsputil_init(&f->dsp, avctx); + + f->avctx= avctx; +} + +static int decode_init(AVCodecContext *avctx){ + FourXContext * const f = avctx->priv_data; + + common_init(avctx); + init_vlcs(f); + + avctx->pix_fmt= PIX_FMT_RGB565; + + return 0; +} + + +static int decode_end(AVCodecContext *avctx){ + FourXContext * const f = avctx->priv_data; + int i; + + av_freep(&f->bitstream_buffer); + f->bitstream_buffer_size=0; + for(i=0; icfrm[i].data); + f->cfrm[i].allocated_size= 0; + } + free_vlc(&f->pre_vlc); + + return 0; +} + +AVCodec fourxm_decoder = { + "4xm", + CODEC_TYPE_VIDEO, + CODEC_ID_4XM, + sizeof(FourXContext), + decode_init, + NULL, + decode_end, + decode_frame, + /*CODEC_CAP_DR1,*/ +}; + diff --git a/mpeg4/src/libavcodec/8bps.c b/mpeg4/src/libavcodec/8bps.c new file mode 100644 index 0000000000000000000000000000000000000000..b16e3bb56710f7f3a60e1f06ee4db362339354e0 --- /dev/null +++ b/mpeg4/src/libavcodec/8bps.c @@ -0,0 +1,234 @@ +/* + * Quicktime Planar RGB (8BPS) Video Decoder + * Copyright (C) 2003 Roberto Togni + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file 8bps.c + * QT 8BPS Video Decoder by Roberto Togni + * For more information about the 8BPS format, visit: + * http://www.pcisys.net/~melanson/codecs/ + * + * Supports: PAL8 (RGB 8bpp, paletted) + * : BGR24 (RGB 24bpp) (can also output it as RGBA32) + * : RGBA32 (RGB 32bpp, 4th plane is probably alpha and it's ignored) + * + */ + +#include +#include + +#include "common.h" +#include "avcodec.h" + + +const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1}; + +/* + * Decoder context + */ +typedef struct EightBpsContext { + + AVCodecContext *avctx; + AVFrame pic; + + unsigned char planes; + unsigned char planemap[4]; +} EightBpsContext; + + +/* + * + * Decode a frame + * + */ +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) +{ + EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; + unsigned char *encoded = (unsigned char *)buf; + unsigned char *pixptr, *pixptr_end; + unsigned int height = avctx->height; // Real image height + unsigned int dlen, p, row; + unsigned char *lp, *dp; + unsigned char count; + unsigned int px_inc; + unsigned int planes = c->planes; + unsigned char *planemap = c->planemap; + + if(c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + + c->pic.reference = 0; + c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; + if(avctx->get_buffer(avctx, &c->pic) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + /* Set data pointer after line lengths */ + dp = encoded + planes * (height << 1); + + /* Ignore alpha plane, don't know what to do with it */ + if (planes == 4) + planes--; + + px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32); + + for (p = 0; p < planes; p++) { + /* Lines length pointer for this plane */ + lp = encoded + p * (height << 1); + + /* Decode a plane */ + for(row = 0; row < height; row++) { + pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p]; + pixptr_end = pixptr + c->pic.linesize[0]; + dlen = be2me_16(*(unsigned short *)(lp+row*2)); + /* Decode a row of this plane */ + while(dlen > 0) { + if(dp + 1 >= buf+buf_size) return -1; + if ((count = *dp++) <= 127) { + count++; + dlen -= count + 1; + if (pixptr + count * px_inc > pixptr_end) + break; + if(dp + count > buf+buf_size) return -1; + while(count--) { + *pixptr = *dp++; + pixptr += px_inc; + } + } else { + count = 257 - count; + if (pixptr + count * px_inc > pixptr_end) + break; + while(count--) { + *pixptr = *dp; + pixptr += px_inc; + } + dp++; + dlen -= 2; + } + } + } + } + + if (avctx->palctrl) { + memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); + if (avctx->palctrl->palette_changed) { + c->pic.palette_has_changed = 1; + avctx->palctrl->palette_changed = 0; + } else + c->pic.palette_has_changed = 0; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = c->pic; + + /* always report that the buffer was completely consumed */ + return buf_size; +} + + +/* + * + * Init 8BPS decoder + * + */ +static int decode_init(AVCodecContext *avctx) +{ + EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; + + c->avctx = avctx; + avctx->has_b_frames = 0; + + c->pic.data[0] = NULL; + + if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { + return 1; + } + + switch (avctx->bits_per_sample) { + case 8: + avctx->pix_fmt = PIX_FMT_PAL8; + c->planes = 1; + c->planemap[0] = 0; // 1st plane is palette indexes + if (avctx->palctrl == NULL) { + av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n"); + return -1; + } + break; + case 24: + avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24); + c->planes = 3; + c->planemap[0] = 2; // 1st plane is red + c->planemap[1] = 1; // 2nd plane is green + c->planemap[2] = 0; // 3rd plane is blue + break; + case 32: + avctx->pix_fmt = PIX_FMT_RGBA32; + c->planes = 4; +#ifdef WORDS_BIGENDIAN + c->planemap[0] = 1; // 1st plane is red + c->planemap[1] = 2; // 2nd plane is green + c->planemap[2] = 3; // 3rd plane is blue + c->planemap[3] = 0; // 4th plane is alpha??? +#else + c->planemap[0] = 2; // 1st plane is red + c->planemap[1] = 1; // 2nd plane is green + c->planemap[2] = 0; // 3rd plane is blue + c->planemap[3] = 3; // 4th plane is alpha??? +#endif + break; + default: + av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample); + return -1; + } + + return 0; +} + + + + +/* + * + * Uninit 8BPS decoder + * + */ +static int decode_end(AVCodecContext *avctx) +{ + EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; + + if (c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + + return 0; +} + + + +AVCodec eightbps_decoder = { + "8bps", + CODEC_TYPE_VIDEO, + CODEC_ID_8BPS, + sizeof(EightBpsContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/Makefile b/mpeg4/src/libavcodec/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d8e246254d3e43f4f9ed3d8f85aaa2a45027a1a4 --- /dev/null +++ b/mpeg4/src/libavcodec/Makefile @@ -0,0 +1,476 @@ +# +# libavcodec Makefile +# (c) 2000-2005 Fabrice Bellard +# +include ../config.mak + +# NOTE: -I.. is needed to include config.h +CFLAGS=$(OPTFLAGS) -DHAVE_AV_CONFIG_H -I.. -I$(SRC_PATH)/libavutil -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_GNU_SOURCE $(AMR_CFLAGS) + +OBJS= bitstream.o utils.o mem.o allcodecs.o \ + mpegvideo.o jrevdct.o jfdctfst.o jfdctint.o\ + mpegaudio.o ac3enc.o mjpeg.o resample.o resample2.o dsputil.o \ + motion_est.o imgconvert.o imgresample.o \ + mpeg12.o mpegaudiodec.o pcm.o simple_idct.o \ + ratecontrol.o adpcm.o eval.o error_resilience.o \ + fft.o mdct.o raw.o golomb.o cabac.o\ + dpcm.o adx.o faandct.o parser.o g726.o \ + vp3dsp.o h264idct.o rangecoder.o pnm.o h263.o msmpeg4.o h263dec.o \ + opt.o + +HEADERS = avcodec.h + +ifeq ($(CONFIG_AASC_DECODER),yes) + OBJS+= aasc.o +endif +ifeq ($(CONFIG_ALAC_DECODER),yes) + OBJS+= alac.o +endif +ifneq ($(CONFIG_ASV1_DECODER)$(CONFIG_ASV1_ENCODER)$(CONFIG_ASV2_DECODER)$(CONFIG_ASV2_ENCODER),) + OBJS+= asv1.o +endif +ifeq ($(CONFIG_AVS_DECODER),yes) + OBJS+= avs.o +endif +ifeq ($(CONFIG_CINEPAK_DECODER),yes) + OBJS+= cinepak.o +endif +ifeq ($(CONFIG_COOK_DECODER),yes) + OBJS+= cook.o +endif +ifneq ($(CONFIG_CLJR_DECODER)$(CONFIG_CLJR_ENCODER),) + OBJS+= cljr.o +endif +ifeq ($(CONFIG_CYUV_DECODER),yes) + OBJS+= cyuv.o +endif +ifeq ($(CONFIG_DVBSUB_DECODER),yes) + OBJS+= dvbsubdec.o +endif +ifeq ($(CONFIG_DVBSUB_ENCODER),yes) + OBJS+= dvbsub.o +endif +ifeq ($(CONFIG_DVDSUB_DECODER),yes) + OBJS+= dvdsub.o +endif +ifeq ($(CONFIG_DVDSUB_ENCODER),yes) + OBJS+= dvdsubenc.o +endif +ifneq ($(CONFIG_DVVIDEO_DECODER)$(CONFIG_DVVIDEO_ENCODER),) + OBJS+= dv.o +endif +ifeq ($(CONFIG_EIGHTBPS_DECODER),yes) + OBJS+= 8bps.o +endif +ifneq ($(CONFIG_FFV1_DECODER)$(CONFIG_FFV1_ENCODER),) + OBJS+= ffv1.o +endif +ifeq ($(CONFIG_FLAC_DECODER),yes) + OBJS+= flac.o +endif +ifeq ($(CONFIG_FLIC_DECODER),yes) + OBJS+= flicvideo.o +endif +ifeq ($(CONFIG_FOURXM_DECODER),yes) + OBJS+= 4xm.o +endif +ifeq ($(CONFIG_FRAPS_DECODER),yes) + OBJS+= fraps.o +endif +ifneq ($(CONFIG_H261_DECODER)$(CONFIG_H261_ENCODER),) + OBJS+= h261.o +endif +ifneq ($(CONFIG_H264_DECODER)$(CONFIG_SVQ3_DECODER),) + OBJS+= h264.o +endif +ifneq ($(CONFIG_HUFFYUV_DECODER)$(CONFIG_HUFFYUV_ENCODER)$(CONFIG_FFVHUFF_DECODER)$(CONFIG_FFVHUFF_ENCODER),) + OBJS+= huffyuv.o +endif +ifeq ($(CONFIG_IDCIN_DECODER),yes) + OBJS+= idcinvideo.o +endif +ifeq ($(CONFIG_INDEO2_DECODER),yes) + OBJS+= indeo2.o +endif +ifeq ($(CONFIG_INDEO3_DECODER),yes) + OBJS+= indeo3.o +endif +ifeq ($(CONFIG_INTERPLAY_VIDEO_DECODER),yes) + OBJS+= interplayvideo.o +endif +ifeq ($(CONFIG_KMVC_DECODER),yes) + OBJS+= kmvc.o +endif +ifneq ($(CONFIG_MSZH_DECODER)$(CONFIG_ZLIB_DECODER)$(CONFIG_ZLIB_ENCODER),) + OBJS+= lcl.o +endif +ifeq ($(CONFIG_LOCO_DECODER),yes) + OBJS+= loco.o +endif +ifneq ($(CONFIG_MACE3_DECODER)$(CONFIG_MACE6_DECODER),) + OBJS+= mace.o +endif +ifeq ($(CONFIG_MSRLE_DECODER),yes) + OBJS+= msrle.o +endif +ifeq ($(CONFIG_MSVIDEO1_DECODER),yes) + OBJS+= msvideo1.o +endif +ifneq ($(CONFIG_PNG_DECODER)$(CONFIG_PNG_ENCODER),) + OBJS+= png.o +endif +ifeq ($(CONFIG_QDM2_DECODER),yes) + OBJS+= qdm2.o +endif +ifeq ($(CONFIG_QDRAW_DECODER),yes) + OBJS+= qdrw.o +endif +ifeq ($(CONFIG_QPEG_DECODER),yes) + OBJS+= qpeg.o +endif +ifeq ($(CONFIG_QTRLE_DECODER),yes) + OBJS+= qtrle.o +endif +ifeq ($(CONFIG_RA_144_DECODER),yes) + OBJS+= ra144.o +endif +ifeq ($(CONFIG_RA_288_DECODER),yes) + OBJS+= ra288.o +endif +ifeq ($(CONFIG_ROQ_DECODER),yes) + OBJS+= roqvideo.o +endif +ifeq ($(CONFIG_RPZA_DECODER),yes) + OBJS+= rpza.o +endif +ifneq ($(CONFIG_RV10_DECODER)$(CONFIG_RV20_DECODER)$(CONFIG_RV10_ENCODER)$(CONFIG_RV20_ENCODER),) + OBJS+= rv10.o +endif +ifeq ($(CONFIG_SHORTEN_DECODER),yes) + OBJS+= shorten.o +endif +ifneq ($(CONFIG_SMACKER_DECODER)$(CONFIG_SMACKAUD_DECODER),) + OBJS+= smacker.o +endif +ifeq ($(CONFIG_SMC_DECODER),yes) + OBJS+= smc.o +endif +ifneq ($(CONFIG_SNOW_DECODER)$(CONFIG_SNOW_ENCODER),) + OBJS+= snow.o +endif +ifneq ($(CONFIG_SONIC_DECODER)$(CONFIG_SONIC_ENCODER)$(CONFIG_SONIC_LS_ENCODER),) + OBJS+= sonic.o +endif +ifneq ($(CONFIG_SVQ1_DECODER)$(CONFIG_SVQ1_ENCODER),) + OBJS+= svq1.o +endif +ifeq ($(CONFIG_TRUEMOTION1_DECODER),yes) + OBJS+= truemotion1.o +endif +ifeq ($(CONFIG_TRUEMOTION2_DECODER),yes) + OBJS+= truemotion2.o +endif +ifeq ($(CONFIG_TRUESPEECH_DECODER),yes) + OBJS+= truespeech.o +endif +ifeq ($(CONFIG_TTA_DECODER),yes) + OBJS+= tta.o +endif +ifeq ($(CONFIG_TSCC_DECODER),yes) + OBJS+= tscc.o +endif +ifeq ($(CONFIG_CSCD_DECODER),yes) + OBJS+= cscd.o + OBJS+= lzo.o +endif +ifeq ($(CONFIG_NUV_DECODER),yes) + OBJS+= nuv.o + OBJS+= rtjpeg.o + OBJS+= lzo.o +endif +ifeq ($(CONFIG_ULTI_DECODER),yes) + OBJS+= ulti.o +endif +ifneq ($(CONFIG_VC9_DECODER)$(CONFIG_WMV3_DECODER),) + OBJS+= vc9.o +endif +ifneq ($(CONFIG_VCR1_DECODER)$(CONFIG_VCR1_ENCODER),) + OBJS+= vcr1.o +endif +ifneq ($(CONFIG_VMDVIDEO_DECODER)$(CONFIG_VMDAUDIO_DECODER),) + OBJS+= vmdav.o +endif +ifeq ($(CONFIG_VORBIS_DECODER),yes) + OBJS+= vorbis.o +endif +ifneq ($(CONFIG_VP3_DECODER)$(CONFIG_THEORA_DECODER),) + OBJS+= vp3.o +endif +ifeq ($(CONFIG_VQA_DECODER),yes) + OBJS+= vqavideo.o +endif +ifneq ($(CONFIG_WMAV1_DECODER)$(CONFIG_WMAV2_DECODER),) + OBJS+= wmadec.o +endif +ifeq ($(CONFIG_WNV1_DECODER),yes) + OBJS+= wnv1.o +endif +ifeq ($(CONFIG_WS_SND1_DECODER),yes) + OBJS+= ws-snd1.o +endif +ifneq ($(CONFIG_XAN_WC3_DECODER)$(CONFIG_XAN_WC4_DECODER),) + OBJS+= xan.o +endif +ifeq ($(CONFIG_XL_DECODER),yes) + OBJS+= xl.o +endif +ifeq ($(CONFIG_BMP_DECODER),yes) + OBJS+= bmp.o +endif +ifeq ($(CONFIG_MMVIDEO_DECODER),yes) + OBJS+= mmvideo.o +endif +ifeq ($(CONFIG_ZMBV_DECODER),yes) + OBJS+= zmbv.o +endif + +AMROBJS= +ifeq ($(AMR_NB),yes) +ifeq ($(AMR_NB_FIXED),yes) +AMROBJS= amr.o +AMREXTRALIBS+= amr/*.o +AMRLIBS=amrlibs +CLEANAMR=cleanamr +else +AMROBJS= amr.o +OBJS+= amr_float/sp_dec.o amr_float/sp_enc.o amr_float/interf_dec.o amr_float/interf_enc.o +CLEANAMR=cleanamrfloat +endif +endif + +ifeq ($(HAVE_PTHREADS),yes) +OBJS+= pthread.o +endif + +ifeq ($(HAVE_W32THREADS),yes) +OBJS+= w32thread.o +endif + +ifeq ($(HAVE_OS2THREADS),yes) +OBJS+= os2thread.o +endif + + +ifeq ($(HAVE_BEOSTHREADS),yes) +OBJS+= beosthread.o +endif + +ifeq ($(AMR_WB),yes) +AMROBJS= amr.o +OBJS+= amrwb_float/dec_acelp.o amrwb_float/dec_dtx.o amrwb_float/dec_gain.o \ + amrwb_float/dec_if.o amrwb_float/dec_lpc.o amrwb_float/dec_main.o \ + amrwb_float/dec_rom.o amrwb_float/dec_util.o amrwb_float/enc_acelp.o \ + amrwb_float/enc_dtx.o amrwb_float/enc_gain.o amrwb_float/enc_if.o \ + amrwb_float/enc_lpc.o amrwb_float/enc_main.o amrwb_float/enc_rom.o \ + amrwb_float/enc_util.o amrwb_float/if_rom.o +endif +OBJS+= $(AMROBJS) +CLEANAMRWB=cleanamrwbfloat +ASM_OBJS= + +ifeq ($(HAVE_XVMC_ACCEL),yes) +OBJS+= xvmcvideo.o +endif + +# currently using liba52 for ac3 decoding +ifeq ($(CONFIG_AC3),yes) +OBJS+= a52dec.o + +# using builtin liba52 or runtime linked liba52.so.0 +ifneq ($(CONFIG_A52BIN),yes) +OBJS+= liba52/bit_allocate.o liba52/bitstream.o liba52/downmix.o \ + liba52/imdct.o liba52/parse.o liba52/crc.o liba52/resample.o +endif +endif + +EXTRALIBS := -L../libavutil -lavutil$(BUILDSUF) $(EXTRALIBS) + +# currently using libdts for dts decoding +ifeq ($(CONFIG_DTS),yes) +OBJS+= dtsdec.o +CFLAGS += $(DTS_INC) +endif + +ifeq ($(CONFIG_FAAD),yes) +OBJS+= faad.o +endif + +ifeq ($(CONFIG_FAAC),yes) +OBJS+= faac.o +endif + +ifeq ($(CONFIG_XVID),yes) +OBJS+= xvidff.o +OBJS+= xvid_rc.o +endif + +ifeq ($(CONFIG_X264),yes) +OBJS+= x264.o +endif + +ifeq ($(CONFIG_MP3LAME),yes) +OBJS += mp3lameaudio.o +endif + +ifeq ($(CONFIG_LIBOGG),yes) +ifeq ($(CONFIG_LIBVORBIS),yes) +OBJS += oggvorbis.o +endif +ifeq ($(CONFIG_LIBTHEORA), yes) +OBJS += oggtheora.o +endif +endif + +ifeq ($(CONFIG_LIBGSM),yes) +OBJS += libgsm.o +endif + +# i386 mmx specific stuff +ifeq ($(TARGET_MMX),yes) +OBJS += i386/fdct_mmx.o i386/cputest.o \ + i386/dsputil_mmx.o i386/mpegvideo_mmx.o \ + i386/idct_mmx.o i386/motion_est_mmx.o \ + i386/simple_idct_mmx.o i386/fft_sse.o i386/vp3dsp_mmx.o \ + i386/vp3dsp_sse2.o i386/fft_3dn.o i386/fft_3dn2.o i386/snowdsp_mmx.o +ifeq ($(CONFIG_GPL),yes) +OBJS += i386/idct_mmx_xvid.o +endif +ifdef TARGET_BUILTIN_VECTOR +i386/fft_sse.o: CFLAGS+= -msse +depend: CFLAGS+= -msse +endif +ifdef TARGET_BUILTIN_3DNOW +i386/fft_3dn.o: CFLAGS+= -m3dnow +i386/fft_3dn2.o: CFLAGS+= -march=athlon +endif +endif + +# armv4l specific stuff +ifeq ($(TARGET_ARCH_ARMV4L),yes) +ASM_OBJS += armv4l/jrevdct_arm.o armv4l/simple_idct_arm.o armv4l/dsputil_arm_s.o +OBJS += armv4l/dsputil_arm.o armv4l/mpegvideo_arm.o +ifeq ($(TARGET_IWMMXT),yes) +OBJS += armv4l/dsputil_iwmmxt.o armv4l/mpegvideo_iwmmxt.o +endif +endif + +# sun mediaLib specific stuff +# currently only works when libavcodec is used in mplayer +ifeq ($(HAVE_MLIB),yes) +OBJS += mlib/dsputil_mlib.o +CFLAGS += $(MLIB_INC) +endif + +# Intel IPP specific stuff +# currently only works when libavcodec is used in mplayer +ifeq ($(HAVE_IPP),yes) +CFLAGS += $(IPP_INC) +endif + +# alpha specific stuff +ifeq ($(TARGET_ARCH_ALPHA),yes) +OBJS += alpha/dsputil_alpha.o alpha/mpegvideo_alpha.o \ + alpha/simple_idct_alpha.o alpha/motion_est_alpha.o +ASM_OBJS += alpha/dsputil_alpha_asm.o alpha/motion_est_mvi_asm.o +CFLAGS += -fforce-addr +endif + +ifeq ($(TARGET_ARCH_POWERPC),yes) +OBJS += ppc/dsputil_ppc.o ppc/mpegvideo_ppc.o +endif + +ifeq ($(TARGET_MMI),yes) +OBJS += ps2/dsputil_mmi.o ps2/idct_mmi.o ps2/mpegvideo_mmi.o +endif + +ifeq ($(TARGET_ALTIVEC),yes) +OBJS += ppc/dsputil_altivec.o ppc/mpegvideo_altivec.o ppc/idct_altivec.o \ + ppc/fft_altivec.o ppc/gmc_altivec.o ppc/fdct_altivec.o \ + ppc/dsputil_h264_altivec.o ppc/dsputil_snow_altivec.o +endif + +ifeq ($(TARGET_ARCH_SH4),yes) +OBJS+= sh4/idct_sh4.o sh4/dsputil_sh4.o sh4/dsputil_align.o +endif + +ifeq ($(TARGET_ARCH_SPARC),yes) +OBJS+=sparc/dsputil_vis.o +sparc/%.o: sparc/%.c + $(CC) -mcpu=ultrasparc -mtune=ultrasparc $(CFLAGS) -c -o $@ $< +endif + +NAME=avcodec +SUBDIR=libavcodec +LIBAVUTIL= $(SRC_PATH)/libavutil/$(LIBPREF)avutil$(LIBSUF) +ifeq ($(BUILD_SHARED),yes) +LIBVERSION=$(LAVCVERSION) +LIBMAJOR=$(LAVCMAJOR) +endif +TESTS= imgresample-test dct-test motion-test fft-test + +EXTRAOBJS = $(AMREXTRALIBS) + +include $(SRC_PATH)/common.mak + +$(LIB): $(AMRLIBS) + +amrlibs: + $(MAKE) -C amr spclib fipoplib + +tests: apiexample cpuid_test $(TESTS) + +dsputil.o: dsputil.c dsputil.h + +clean:: $(CLEANAMR) + rm -f \ + i386/*.o i386/*~ \ + armv4l/*.o armv4l/*~ \ + mlib/*.o mlib/*~ \ + alpha/*.o alpha/*~ \ + ppc/*.o ppc/*~ \ + ps2/*.o ps2/*~ \ + sh4/*.o sh4/*~ \ + sparc/*.o sparc/*~ \ + liba52/*.o liba52/*~ \ + apiexample $(TESTS) + +cleanamr: + $(MAKE) -C amr clean + +cleanamrfloat: + rm -f amr_float/*.o + +cleanamrwbfloat: + $(MAKE) -C amrwb_float -f makefile.gcc clean + +# api example program +apiexample: apiexample.c $(LIB) + $(CC) $(CFLAGS) -o $@ $< $(LIB) $(LIBAVUTIL) $(EXTRALIBS) + +# cpuid test +cpuid_test: i386/cputest.c + $(CC) $(CFLAGS) -D__TEST__ -o $@ $< + +# testing progs + +imgresample-test: imgresample.c + $(CC) $(CFLAGS) -DTEST -o $@ $^ -lm + +dct-test: dct-test.o fdctref.o $(LIB) + $(CC) -o $@ $^ -lm $(LIBAVUTIL) + +motion-test: motion_test.o $(LIB) + $(CC) -o $@ $^ -lm + +fft-test: fft-test.o $(LIB) + $(CC) -o $@ $^ $(LIBAVUTIL) -lm diff --git a/mpeg4/src/libavcodec/a52dec.c b/mpeg4/src/libavcodec/a52dec.c new file mode 100644 index 0000000000000000000000000000000000000000..5226325ab607ae6f6016416ff582dc6d9385a6b0 --- /dev/null +++ b/mpeg4/src/libavcodec/a52dec.c @@ -0,0 +1,255 @@ +/* + * A52 decoder + * Copyright (c) 2001 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file a52dec.c + * A52 decoder. + */ + +#include "avcodec.h" +#include "liba52/a52.h" + +#ifdef CONFIG_A52BIN +#include +static const char* liba52name = "liba52.so.0"; +#endif + +/** + * liba52 - Copyright (C) Aaron Holtzman + * released under the GPL license. + */ +typedef struct AC3DecodeState { + uint8_t inbuf[4096]; /* input buffer */ + uint8_t *inbuf_ptr; + int frame_size; + int flags; + int channels; + a52_state_t* state; + sample_t* samples; + + /* + * virtual method table + * + * using this function table so the liba52 doesn't + * have to be really linked together with ffmpeg + * and might be linked in runtime - this allows binary + * distribution of ffmpeg library which doens't depend + * on liba52 library - but if user has it installed + * it will be used - user might install such library + * separately + */ + void* handle; + a52_state_t* (*a52_init)(uint32_t mm_accel); + sample_t* (*a52_samples)(a52_state_t * state); + int (*a52_syncinfo)(uint8_t * buf, int * flags, + int * sample_rate, int * bit_rate); + int (*a52_frame)(a52_state_t * state, uint8_t * buf, int * flags, + sample_t * level, sample_t bias); + void (*a52_dynrng)(a52_state_t * state, + sample_t (* call) (sample_t, void *), void * data); + int (*a52_block)(a52_state_t * state); + void (*a52_free)(a52_state_t * state); + +} AC3DecodeState; + +#ifdef CONFIG_A52BIN +static void* dlsymm(void* handle, const char* symbol) +{ + void* f = dlsym(handle, symbol); + if (!f) + av_log( NULL, AV_LOG_ERROR, "A52 Decoder - function '%s' can't be resolved\n", symbol); + return f; +} +#endif + +static int a52_decode_init(AVCodecContext *avctx) +{ + AC3DecodeState *s = avctx->priv_data; + +#ifdef CONFIG_A52BIN + s->handle = dlopen(liba52name, RTLD_LAZY); + if (!s->handle) + { + av_log( avctx, AV_LOG_ERROR, "A52 library %s could not be opened! \n%s\n", liba52name, dlerror()); + return -1; + } + s->a52_init = (a52_state_t* (*)(uint32_t)) dlsymm(s->handle, "a52_init"); + s->a52_samples = (sample_t* (*)(a52_state_t*)) dlsymm(s->handle, "a52_samples"); + s->a52_syncinfo = (int (*)(uint8_t*, int*, int*, int*)) dlsymm(s->handle, "a52_syncinfo"); + s->a52_frame = (int (*)(a52_state_t*, uint8_t*, int*, sample_t*, sample_t)) dlsymm(s->handle, "a52_frame"); + s->a52_block = (int (*)(a52_state_t*)) dlsymm(s->handle, "a52_block"); + s->a52_free = (void (*)(a52_state_t*)) dlsymm(s->handle, "a52_free"); + if (!s->a52_init || !s->a52_samples || !s->a52_syncinfo + || !s->a52_frame || !s->a52_block || !s->a52_free) + { + dlclose(s->handle); + return -1; + } +#else + /* static linked version */ + s->handle = 0; + s->a52_init = a52_init; + s->a52_samples = a52_samples; + s->a52_syncinfo = a52_syncinfo; + s->a52_frame = a52_frame; + s->a52_block = a52_block; + s->a52_free = a52_free; +#endif + s->state = s->a52_init(0); /* later use CPU flags */ + s->samples = s->a52_samples(s->state); + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + + return 0; +} + +/**** the following two functions comes from a52dec */ +static inline int blah (int32_t i) +{ + if (i > 0x43c07fff) + return 32767; + else if (i < 0x43bf8000) + return -32768; + return i - 0x43c00000; +} + +static inline void float_to_int (float * _f, int16_t * s16, int nchannels) +{ + int i, j, c; + int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format + + j = 0; + nchannels *= 256; + for (i = 0; i < 256; i++) { + for (c = 0; c < nchannels; c += 256) + s16[j++] = blah (f[i + c]); + } +} + +/**** end */ + +#define HEADER_SIZE 7 + +static int a52_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + AC3DecodeState *s = avctx->priv_data; + uint8_t *buf_ptr; + int flags, i, len; + int sample_rate, bit_rate; + short *out_samples = data; + float level; + static const int ac3_channels[8] = { + 2, 1, 2, 3, 3, 4, 4, 5 + }; + + buf_ptr = buf; + while (buf_size > 0) { + len = s->inbuf_ptr - s->inbuf; + if (s->frame_size == 0) { + /* no header seen : find one. We need at least 7 bytes to parse it */ + len = HEADER_SIZE - len; + if (len > buf_size) + len = buf_size; + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + if ((s->inbuf_ptr - s->inbuf) == HEADER_SIZE) { + len = s->a52_syncinfo(s->inbuf, &s->flags, &sample_rate, &bit_rate); + if (len == 0) { + /* no sync found : move by one byte (inefficient, but simple!) */ + memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); + s->inbuf_ptr--; + } else { + s->frame_size = len; + /* update codec info */ + avctx->sample_rate = sample_rate; + s->channels = ac3_channels[s->flags & 7]; + if (s->flags & A52_LFE) + s->channels++; + if (avctx->channels == 0) + /* No specific number of channel requested */ + avctx->channels = s->channels; + else if (s->channels < avctx->channels) { + av_log(avctx, AV_LOG_ERROR, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); + avctx->channels = s->channels; + } + avctx->bit_rate = bit_rate; + } + } + } else if (len < s->frame_size) { + len = s->frame_size - len; + if (len > buf_size) + len = buf_size; + + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + } else { + flags = s->flags; + if (avctx->channels == 1) + flags = A52_MONO; + else if (avctx->channels == 2) + flags = A52_STEREO; + else + flags |= A52_ADJUST_LEVEL; + level = 1; + if (s->a52_frame(s->state, s->inbuf, &flags, &level, 384)) { + fail: + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + continue; + } + for (i = 0; i < 6; i++) { + if (s->a52_block(s->state)) + goto fail; + float_to_int(s->samples, out_samples + i * 256 * avctx->channels, avctx->channels); + } + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + *data_size = 6 * avctx->channels * 256 * sizeof(int16_t); + break; + } + } + return buf_ptr - buf; +} + +static int a52_decode_end(AVCodecContext *avctx) +{ + AC3DecodeState *s = avctx->priv_data; + s->a52_free(s->state); +#ifdef CONFIG_A52BIN + dlclose(s->handle); +#endif + return 0; +} + +AVCodec ac3_decoder = { + "ac3", + CODEC_TYPE_AUDIO, + CODEC_ID_AC3, + sizeof(AC3DecodeState), + a52_decode_init, + NULL, + a52_decode_end, + a52_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/aasc.c b/mpeg4/src/libavcodec/aasc.c new file mode 100644 index 0000000000000000000000000000000000000000..4622828007beae18b4c5407e05bdf27452190c85 --- /dev/null +++ b/mpeg4/src/libavcodec/aasc.c @@ -0,0 +1,174 @@ +/* + * Autodesc RLE Decoder + * Copyright (C) 2005 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file aasc.c + * Autodesc RLE Video Decoder by Konstantin Shishkov + */ + +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" + +typedef struct AascContext { + AVCodecContext *avctx; + AVFrame frame; +} AascContext; + +#define FETCH_NEXT_STREAM_BYTE() \ + if (stream_ptr >= buf_size) \ + { \ + av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (fetch)\n"); \ + break; \ + } \ + stream_byte = buf[stream_ptr++]; + +static int aasc_decode_init(AVCodecContext *avctx) +{ + AascContext *s = (AascContext *)avctx->priv_data; + + s->avctx = avctx; + + avctx->pix_fmt = PIX_FMT_BGR24; + avctx->has_b_frames = 0; + s->frame.data[0] = NULL; + + return 0; +} + +static int aasc_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + AascContext *s = (AascContext *)avctx->priv_data; + int stream_ptr = 4; + unsigned char rle_code; + unsigned char stream_byte; + int pixel_ptr = 0; + int row_dec, row_ptr; + int frame_size; + int i; + + s->frame.reference = 1; + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, &s->frame)) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + + row_dec = s->frame.linesize[0]; + row_ptr = (s->avctx->height - 1) * row_dec; + frame_size = row_dec * s->avctx->height; + + while (row_ptr >= 0) { + FETCH_NEXT_STREAM_BYTE(); + rle_code = stream_byte; + if (rle_code == 0) { + /* fetch the next byte to see how to handle escape code */ + FETCH_NEXT_STREAM_BYTE(); + if (stream_byte == 0) { + /* line is done, goto the next one */ + row_ptr -= row_dec; + pixel_ptr = 0; + } else if (stream_byte == 1) { + /* decode is done */ + break; + } else if (stream_byte == 2) { + /* reposition frame decode coordinates */ + FETCH_NEXT_STREAM_BYTE(); + pixel_ptr += stream_byte; + FETCH_NEXT_STREAM_BYTE(); + row_ptr -= stream_byte * row_dec; + } else { + /* copy pixels from encoded stream */ + if ((pixel_ptr + stream_byte > avctx->width * 3) || + (row_ptr < 0)) { + av_log(s->avctx, AV_LOG_ERROR, " AASC: frame ptr just went out of bounds (copy1)\n"); + break; + } + + rle_code = stream_byte; + if (stream_ptr + rle_code > buf_size) { + av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (copy2)\n"); + break; + } + + for (i = 0; i < rle_code; i++) { + FETCH_NEXT_STREAM_BYTE(); + s->frame.data[0][row_ptr + pixel_ptr] = stream_byte; + pixel_ptr++; + } + if (rle_code & 1) + stream_ptr++; + } + } else { + /* decode a run of data */ + if ((pixel_ptr + rle_code > avctx->width * 3) || + (row_ptr < 0)) { + av_log(s->avctx, AV_LOG_ERROR, " AASC: frame ptr just went out of bounds (run1)\n"); + break; + } + + FETCH_NEXT_STREAM_BYTE(); + + while(rle_code--) { + s->frame.data[0][row_ptr + pixel_ptr] = stream_byte; + pixel_ptr++; + } + } + } + + /* one last sanity check on the way out */ + if (stream_ptr < buf_size) + av_log(s->avctx, AV_LOG_ERROR, " AASC: ended frame decode with bytes left over (%d < %d)\n", + stream_ptr, buf_size); + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = s->frame; + + /* report that the buffer was completely consumed */ + return buf_size; +} + +static int aasc_decode_end(AVCodecContext *avctx) +{ + AascContext *s = (AascContext *)avctx->priv_data; + + /* release the last frame */ + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + return 0; +} + +AVCodec aasc_decoder = { + "aasc", + CODEC_TYPE_VIDEO, + CODEC_ID_AASC, + sizeof(AascContext), + aasc_decode_init, + NULL, + aasc_decode_end, + aasc_decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/ac3.h b/mpeg4/src/libavcodec/ac3.h new file mode 100644 index 0000000000000000000000000000000000000000..f59d16a95e1f9a037a919acde780806952bec558 --- /dev/null +++ b/mpeg4/src/libavcodec/ac3.h @@ -0,0 +1,63 @@ +/* + * Common code between AC3 encoder and decoder + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file ac3.h + * Common code between AC3 encoder and decoder. + */ + +#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */ +#define AC3_MAX_CHANNELS 6 /* including LFE channel */ + +#define NB_BLOCKS 6 /* number of PCM blocks inside an AC3 frame */ +#define AC3_FRAME_SIZE (NB_BLOCKS * 256) + +/* exponent encoding strategy */ +#define EXP_REUSE 0 +#define EXP_NEW 1 + +#define EXP_D15 1 +#define EXP_D25 2 +#define EXP_D45 3 + +typedef struct AC3BitAllocParameters { + int fscod; /* frequency */ + int halfratecod; + int sgain, sdecay, fdecay, dbknee, floor; + int cplfleak, cplsleak; +} AC3BitAllocParameters; + +#if 0 +extern const uint16_t ac3_freqs[3]; +extern const uint16_t ac3_bitratetab[19]; +extern const int16_t ac3_window[256]; +extern const uint8_t sdecaytab[4]; +extern const uint8_t fdecaytab[4]; +extern const uint16_t sgaintab[4]; +extern const uint16_t dbkneetab[4]; +extern const uint16_t floortab[8]; +extern const uint16_t fgaintab[8]; +#endif + +void ac3_common_init(void); +void ac3_parametric_bit_allocation(AC3BitAllocParameters *s, uint8_t *bap, + int8_t *exp, int start, int end, + int snroffset, int fgain, int is_lfe, + int deltbae,int deltnseg, + uint8_t *deltoffst, uint8_t *deltlen, uint8_t *deltba); diff --git a/mpeg4/src/libavcodec/ac3dec.c b/mpeg4/src/libavcodec/ac3dec.c new file mode 100644 index 0000000000000000000000000000000000000000..b6b7852e1ac47b62fb24226dc9c77480c9e361a5 --- /dev/null +++ b/mpeg4/src/libavcodec/ac3dec.c @@ -0,0 +1,182 @@ +/* + * AC3 decoder + * Copyright (c) 2001 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file ac3dec.c + * AC3 decoder. + */ + +//#define DEBUG + +#include "avcodec.h" +#include "libac3/ac3.h" + +/* currently, I use libac3 which is Copyright (C) Aaron Holtzman and + released under the GPL license. I may reimplement it someday... */ +typedef struct AC3DecodeState { + uint8_t inbuf[4096]; /* input buffer */ + uint8_t *inbuf_ptr; + int frame_size; + int flags; + int channels; + ac3_state_t state; +} AC3DecodeState; + +static int ac3_decode_init(AVCodecContext *avctx) +{ + AC3DecodeState *s = avctx->priv_data; + + ac3_init (); + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + return 0; +} + +stream_samples_t samples; + +/**** the following two functions comes from ac3dec */ +static inline int blah (int32_t i) +{ + if (i > 0x43c07fff) + return 32767; + else if (i < 0x43bf8000) + return -32768; + else + return i - 0x43c00000; +} + +static inline void float_to_int (float * _f, int16_t * s16, int nchannels) +{ + int i, j, c; + int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format + + j = 0; + nchannels *= 256; + for (i = 0; i < 256; i++) { + for (c = 0; c < nchannels; c += 256) + s16[j++] = blah (f[i + c]); + } +} + +/**** end */ + +#define HEADER_SIZE 7 + +static int ac3_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + AC3DecodeState *s = avctx->priv_data; + uint8_t *buf_ptr; + int flags, i, len; + int sample_rate, bit_rate; + short *out_samples = data; + float level; + static const int ac3_channels[8] = { + 2, 1, 2, 3, 3, 4, 4, 5 + }; + + buf_ptr = buf; + while (buf_size > 0) { + len = s->inbuf_ptr - s->inbuf; + if (s->frame_size == 0) { + /* no header seen : find one. We need at least 7 bytes to parse it */ + len = HEADER_SIZE - len; + if (len > buf_size) + len = buf_size; + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + if ((s->inbuf_ptr - s->inbuf) == HEADER_SIZE) { + len = ac3_syncinfo (s->inbuf, &s->flags, &sample_rate, &bit_rate); + if (len == 0) { + /* no sync found : move by one byte (inefficient, but simple!) */ + memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); + s->inbuf_ptr--; + } else { + s->frame_size = len; + /* update codec info */ + avctx->sample_rate = sample_rate; + s->channels = ac3_channels[s->flags & 7]; + if (s->flags & AC3_LFE) + s->channels++; + if (avctx->channels == 0) + /* No specific number of channel requested */ + avctx->channels = s->channels; + else if (s->channels < avctx->channels) { + av_log( avctx, AV_LOG_INFO, "ac3dec: AC3 Source channels are less than specified: output to %d channels.. (frmsize: %d)\n", s->channels, len); + avctx->channels = s->channels; + } + avctx->bit_rate = bit_rate; + } + } + } else if (len < s->frame_size) { + len = s->frame_size - len; + if (len > buf_size) + len = buf_size; + + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + } else { + flags = s->flags; + if (avctx->channels == 1) + flags = AC3_MONO; + else if (avctx->channels == 2) + flags = AC3_STEREO; + else + flags |= AC3_ADJUST_LEVEL; + level = 1; + if (ac3_frame (&s->state, s->inbuf, &flags, &level, 384)) { + fail: + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + continue; + } + for (i = 0; i < 6; i++) { + if (ac3_block (&s->state)) + goto fail; + float_to_int (*samples, out_samples + i * 256 * avctx->channels, avctx->channels); + } + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + *data_size = 6 * avctx->channels * 256 * sizeof(int16_t); + break; + } + } + return buf_ptr - buf; +} + +static int ac3_decode_end(AVCodecContext *s) +{ + return 0; +} + +AVCodec ac3_decoder = { + "ac3", + CODEC_TYPE_AUDIO, + CODEC_ID_AC3, + sizeof(AC3DecodeState), + ac3_decode_init, + NULL, + ac3_decode_end, + ac3_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/ac3enc.c b/mpeg4/src/libavcodec/ac3enc.c new file mode 100644 index 0000000000000000000000000000000000000000..a587c647b5b3fbcace0cf57d4b7a45b5795bf761 --- /dev/null +++ b/mpeg4/src/libavcodec/ac3enc.c @@ -0,0 +1,1602 @@ +/* + * The simplest AC3 encoder + * Copyright (c) 2000 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file ac3enc.c + * The simplest AC3 encoder. + */ +//#define DEBUG +//#define DEBUG_BITALLOC +#include "avcodec.h" +#include "bitstream.h" +#include "crc.h" +#include "ac3.h" + +typedef struct AC3EncodeContext { + PutBitContext pb; + int nb_channels; + int nb_all_channels; + int lfe_channel; + int bit_rate; + unsigned int sample_rate; + unsigned int bsid; + unsigned int frame_size_min; /* minimum frame size in case rounding is necessary */ + unsigned int frame_size; /* current frame size in words */ + unsigned int bits_written; + unsigned int samples_written; + int halfratecod; + unsigned int frmsizecod; + unsigned int fscod; /* frequency */ + unsigned int acmod; + int lfe; + unsigned int bsmod; + short last_samples[AC3_MAX_CHANNELS][256]; + unsigned int chbwcod[AC3_MAX_CHANNELS]; + int nb_coefs[AC3_MAX_CHANNELS]; + + /* bitrate allocation control */ + int sgaincod, sdecaycod, fdecaycod, dbkneecod, floorcod; + AC3BitAllocParameters bit_alloc; + int csnroffst; + int fgaincod[AC3_MAX_CHANNELS]; + int fsnroffst[AC3_MAX_CHANNELS]; + /* mantissa encoding */ + int mant1_cnt, mant2_cnt, mant4_cnt; +} AC3EncodeContext; + +#include "ac3tab.h" + +#define MDCT_NBITS 9 +#define N (1 << MDCT_NBITS) + +/* new exponents are sent if their Norm 1 exceed this number */ +#define EXP_DIFF_THRESHOLD 1000 + +static void fft_init(int ln); + +static inline int16_t fix15(float a) +{ + int v; + v = (int)(a * (float)(1 << 15)); + if (v < -32767) + v = -32767; + else if (v > 32767) + v = 32767; + return v; +} + + +/** + * Generate a Kaiser Window. + */ +static void k_window_init(int alpha, double *window, int n, int iter) +{ + int j, k; + double a, x; + a = alpha * M_PI / n; + a = a*a; + for(k=0; k0; j--) { + window[k] = (window[k] * x / (j*j)) + 1.0; + } + } +} + +/** + * Generate a Kaiser-Bessel Derived Window. + * @param alpha determines window shape + * @param window array to fill with window values + * @param iter number of iterations to use in BesselI0 + */ + +static void kbd_window_init(int alpha, int16_t *out_window, int iter) +{ + int k, n2; + double kwindow[256]; + double window[256]; + + n2 = 256; + k_window_init(alpha, kwindow, n2, iter); + window[0] = kwindow[0]; + for(k=1; k b1) { + a = a - 64; + if (a < 0) a=0; + } + return a; +} + +static inline int calc_lowcomp(int a, int b0, int b1, int bin) +{ + if (bin < 7) { + if ((b0 + 256) == b1) { + a = 384 ; + } else if (b0 > b1) { + a = a - 64; + if (a < 0) a=0; + } + } else if (bin < 20) { + if ((b0 + 256) == b1) { + a = 320 ; + } else if (b0 > b1) { + a= a - 64; + if (a < 0) a=0; + } + } else { + a = a - 128; + if (a < 0) a=0; + } + return a; +} + +/* AC3 bit allocation. The algorithm is the one described in the AC3 + spec. */ +void ac3_parametric_bit_allocation(AC3BitAllocParameters *s, uint8_t *bap, + int8_t *exp, int start, int end, + int snroffset, int fgain, int is_lfe, + int deltbae,int deltnseg, + uint8_t *deltoffst, uint8_t *deltlen, uint8_t *deltba) +{ + int bin,i,j,k,end1,v,v1,bndstrt,bndend,lowcomp,begin; + int fastleak,slowleak,address,tmp; + int16_t psd[256]; /* scaled exponents */ + int16_t bndpsd[50]; /* interpolated exponents */ + int16_t excite[50]; /* excitation */ + int16_t mask[50]; /* masking value */ + + /* exponent mapping to PSD */ + for(bin=start;bin end) end1=end; + for(i=j;i= 0) { + adr=c >> 1; + if (adr > 255) adr=255; + v=v + latab[adr]; + } else { + adr=(-c) >> 1; + if (adr > 255) adr=255; + v=v1 + latab[adr]; + } + j++; + } + bndpsd[k]=v; + k++; + } while (end > bndtab[k]); + + /* excitation function */ + bndstrt = masktab[start]; + bndend = masktab[end-1] + 1; + + if (bndstrt == 0) { + lowcomp = 0; + lowcomp = calc_lowcomp1(lowcomp, bndpsd[0], bndpsd[1]) ; + excite[0] = bndpsd[0] - fgain - lowcomp ; + lowcomp = calc_lowcomp1(lowcomp, bndpsd[1], bndpsd[2]) ; + excite[1] = bndpsd[1] - fgain - lowcomp ; + begin = 7 ; + for (bin = 2; bin < 7; bin++) { + if (!(is_lfe && bin == 6)) + lowcomp = calc_lowcomp1(lowcomp, bndpsd[bin], bndpsd[bin+1]) ; + fastleak = bndpsd[bin] - fgain ; + slowleak = bndpsd[bin] - s->sgain ; + excite[bin] = fastleak - lowcomp ; + if (!(is_lfe && bin == 6)) { + if (bndpsd[bin] <= bndpsd[bin+1]) { + begin = bin + 1 ; + break ; + } + } + } + + end1=bndend; + if (end1 > 22) end1=22; + + for (bin = begin; bin < end1; bin++) { + if (!(is_lfe && bin == 6)) + lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin) ; + + fastleak -= s->fdecay ; + v = bndpsd[bin] - fgain; + if (fastleak < v) fastleak = v; + + slowleak -= s->sdecay ; + v = bndpsd[bin] - s->sgain; + if (slowleak < v) slowleak = v; + + v=fastleak - lowcomp; + if (slowleak > v) v=slowleak; + + excite[bin] = v; + } + begin = 22; + } else { + /* coupling channel */ + begin = bndstrt; + + fastleak = (s->cplfleak << 8) + 768; + slowleak = (s->cplsleak << 8) + 768; + } + + for (bin = begin; bin < bndend; bin++) { + fastleak -= s->fdecay ; + v = bndpsd[bin] - fgain; + if (fastleak < v) fastleak = v; + slowleak -= s->sdecay ; + v = bndpsd[bin] - s->sgain; + if (slowleak < v) slowleak = v; + + v=fastleak; + if (slowleak > v) v = slowleak; + excite[bin] = v; + } + + /* compute masking curve */ + + for (bin = bndstrt; bin < bndend; bin++) { + v1 = excite[bin]; + tmp = s->dbknee - bndpsd[bin]; + if (tmp > 0) { + v1 += tmp >> 2; + } + v=hth[bin >> s->halfratecod][s->fscod]; + if (v1 > v) v=v1; + mask[bin] = v; + } + + /* delta bit allocation */ + + if (deltbae == 0 || deltbae == 1) { + int band, seg, delta; + band = 0 ; + for (seg = 0; seg < deltnseg; seg++) { + band += deltoffst[seg] ; + if (deltba[seg] >= 4) { + delta = (deltba[seg] - 3) << 7; + } else { + delta = (deltba[seg] - 4) << 7; + } + for (k = 0; k < deltlen[seg]; k++) { + mask[band] += delta ; + band++ ; + } + } + } + + /* compute bit allocation */ + + i = start ; + j = masktab[start] ; + do { + v=mask[j]; + v -= snroffset ; + v -= s->floor ; + if (v < 0) v = 0; + v &= 0x1fe0 ; + v += s->floor ; + + end1=bndtab[j] + bndsz[j]; + if (end1 > end) end1=end; + + for (k = i; k < end1; k++) { + address = (psd[i] - v) >> 5 ; + if (address < 0) address=0; + else if (address > 63) address=63; + bap[i] = baptab[address]; + i++; + } + } while (end > bndtab[j++]) ; +} + +typedef struct IComplex { + short re,im; +} IComplex; + +static void fft_init(int ln) +{ + int i, j, m, n; + float alpha; + + n = 1 << ln; + + for(i=0;i<(n/2);i++) { + alpha = 2 * M_PI * (float)i / (float)n; + costab[i] = fix15(cos(alpha)); + sintab[i] = fix15(sin(alpha)); + } + + for(i=0;i> j) & 1) << (ln-j-1); + } + fft_rev[i]=m; + } +} + +/* butter fly op */ +#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ +{\ + int ax, ay, bx, by;\ + bx=pre1;\ + by=pim1;\ + ax=qre1;\ + ay=qim1;\ + pre = (bx + ax) >> 1;\ + pim = (by + ay) >> 1;\ + qre = (bx - ax) >> 1;\ + qim = (by - ay) >> 1;\ +} + +#define MUL16(a,b) ((a) * (b)) + +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + pre = (MUL16(are, bre) - MUL16(aim, bim)) >> 15;\ + pim = (MUL16(are, bim) + MUL16(bre, aim)) >> 15;\ +} + + +/* do a 2^n point complex fft on 2^ln points. */ +static void fft(IComplex *z, int ln) +{ + int j, l, np, np2; + int nblocks, nloops; + register IComplex *p,*q; + int tmp_re, tmp_im; + + np = 1 << ln; + + /* reverse */ + for(j=0;j> 1); + do { + BF(p[0].re, p[0].im, p[1].re, p[1].im, + p[0].re, p[0].im, p[1].re, p[1].im); + p+=2; + } while (--j != 0); + + /* pass 1 */ + + p=&z[0]; + j=np >> 2; + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, p[3].im, -p[3].re); + p+=4; + } while (--j != 0); + + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + do { + p = z; + q = z + nloops; + for (j = 0; j < nblocks; ++j) { + + BF(p->re, p->im, q->re, q->im, + p->re, p->im, q->re, q->im); + + p++; + q++; + for(l = nblocks; l < np2; l += nblocks) { + CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im); + BF(p->re, p->im, q->re, q->im, + p->re, p->im, tmp_re, tmp_im); + p++; + q++; + } + p += nloops; + q += nloops; + } + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +/* do a 512 point mdct */ +static void mdct512(int32_t *out, int16_t *in) +{ + int i, re, im, re1, im1; + int16_t rot[N]; + IComplex x[N/4]; + + /* shift to simplify computations */ + for(i=0;i> 1; + im = -((int)rot[N/2+2*i] - (int)rot[N/2-1-2*i]) >> 1; + CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]); + } + + fft(x, MDCT_NBITS - 2); + + /* post rotation */ + for(i=0;i EXP_DIFF_THRESHOLD) + exp_strategy[i][ch] = EXP_NEW; + else + exp_strategy[i][ch] = EXP_REUSE; + } + if (is_lfe) + return; + + /* now select the encoding strategy type : if exponents are often + recoded, we use a coarse encoding */ + i = 0; + while (i < NB_BLOCKS) { + j = i + 1; + while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) + j++; + switch(j - i) { + case 1: + exp_strategy[i][ch] = EXP_D45; + break; + case 2: + case 3: + exp_strategy[i][ch] = EXP_D25; + break; + default: + exp_strategy[i][ch] = EXP_D15; + break; + } + i = j; + } +} + +/* set exp[i] to min(exp[i], exp1[i]) */ +static void exponent_min(uint8_t exp[N/2], uint8_t exp1[N/2], int n) +{ + int i; + + for(i=0;i= 0 && exp_min <= 24); + for(j=1;j 15) + exp1[0] = 15; + + /* Decrease the delta between each groups to within 2 + * so that they can be differentially encoded */ + for (i=1;i<=nb_groups;i++) + exp1[i] = FFMIN(exp1[i], exp1[i-1] + 2); + for (i=nb_groups-1;i>=0;i--) + exp1[i] = FFMIN(exp1[i], exp1[i+1] + 2); + + /* now we have the exponent values the decoder will see */ + encoded_exp[0] = exp1[0]; + k = 1; + for(i=1;i<=nb_groups;i++) { + for(j=0;jmant1_cnt == 0) + bits += 5; + if (++s->mant1_cnt == 3) + s->mant1_cnt = 0; + break; + case 2: + /* 3 mantissa in 7 bits */ + if (s->mant2_cnt == 0) + bits += 7; + if (++s->mant2_cnt == 3) + s->mant2_cnt = 0; + break; + case 3: + bits += 3; + break; + case 4: + /* 2 mantissa in 7 bits */ + if (s->mant4_cnt == 0) + bits += 7; + if (++s->mant4_cnt == 2) + s->mant4_cnt = 0; + break; + case 14: + bits += 14; + break; + case 15: + bits += 16; + break; + default: + bits += mant - 1; + break; + } + } + return bits; +} + + +static int bit_alloc(AC3EncodeContext *s, + uint8_t bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + uint8_t encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + int frame_bits, int csnroffst, int fsnroffst) +{ + int i, ch; + + /* compute size */ + for(i=0;imant1_cnt = 0; + s->mant2_cnt = 0; + s->mant4_cnt = 0; + for(ch=0;chnb_all_channels;ch++) { + ac3_parametric_bit_allocation(&s->bit_alloc, + bap[i][ch], (int8_t *)encoded_exp[i][ch], + 0, s->nb_coefs[ch], + (((csnroffst-15) << 4) + + fsnroffst) << 2, + fgaintab[s->fgaincod[ch]], + ch == s->lfe_channel, + 2, 0, NULL, NULL, NULL); + frame_bits += compute_mantissa_size(s, bap[i][ch], + s->nb_coefs[ch]); + } + } +#if 0 + printf("csnr=%d fsnr=%d frame_bits=%d diff=%d\n", + csnroffst, fsnroffst, frame_bits, + 16 * s->frame_size - ((frame_bits + 7) & ~7)); +#endif + return 16 * s->frame_size - frame_bits; +} + +#define SNR_INC1 4 + +static int compute_bit_allocation(AC3EncodeContext *s, + uint8_t bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + uint8_t encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + int frame_bits) +{ + int i, ch; + int csnroffst, fsnroffst; + uint8_t bap1[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + static int frame_bits_inc[8] = { 0, 0, 2, 2, 2, 4, 2, 4 }; + + /* init default parameters */ + s->sdecaycod = 2; + s->fdecaycod = 1; + s->sgaincod = 1; + s->dbkneecod = 2; + s->floorcod = 4; + for(ch=0;chnb_all_channels;ch++) + s->fgaincod[ch] = 4; + + /* compute real values */ + s->bit_alloc.fscod = s->fscod; + s->bit_alloc.halfratecod = s->halfratecod; + s->bit_alloc.sdecay = sdecaytab[s->sdecaycod] >> s->halfratecod; + s->bit_alloc.fdecay = fdecaytab[s->fdecaycod] >> s->halfratecod; + s->bit_alloc.sgain = sgaintab[s->sgaincod]; + s->bit_alloc.dbknee = dbkneetab[s->dbkneecod]; + s->bit_alloc.floor = floortab[s->floorcod]; + + /* header size */ + frame_bits += 65; + // if (s->acmod == 2) + // frame_bits += 2; + frame_bits += frame_bits_inc[s->acmod]; + + /* audio blocks */ + for(i=0;inb_channels * 2 + 2; /* blksw * c, dithflag * c, dynrnge, cplstre */ + if (s->acmod == 2) { + frame_bits++; /* rematstr */ + if(i==0) frame_bits += 4; + } + frame_bits += 2 * s->nb_channels; /* chexpstr[2] * c */ + if (s->lfe) + frame_bits++; /* lfeexpstr */ + for(ch=0;chnb_channels;ch++) { + if (exp_strategy[i][ch] != EXP_REUSE) + frame_bits += 6 + 2; /* chbwcod[6], gainrng[2] */ + } + frame_bits++; /* baie */ + frame_bits++; /* snr */ + frame_bits += 2; /* delta / skip */ + } + frame_bits++; /* cplinu for block 0 */ + /* bit alloc info */ + /* sdcycod[2], fdcycod[2], sgaincod[2], dbpbcod[2], floorcod[3] */ + /* csnroffset[6] */ + /* (fsnoffset[4] + fgaincod[4]) * c */ + frame_bits += 2*4 + 3 + 6 + s->nb_all_channels * (4 + 3); + + /* auxdatae, crcrsv */ + frame_bits += 2; + + /* CRC */ + frame_bits += 16; + + /* now the big work begins : do the bit allocation. Modify the snr + offset until we can pack everything in the requested frame size */ + + csnroffst = s->csnroffst; + while (csnroffst >= 0 && + bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) + csnroffst -= SNR_INC1; + if (csnroffst < 0) { + av_log(NULL, AV_LOG_ERROR, "Bit allocation failed, try increasing the bitrate, -ab 384 for example!\n"); + return -1; + } + while ((csnroffst + SNR_INC1) <= 63 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst + SNR_INC1, 0) >= 0) { + csnroffst += SNR_INC1; + memcpy(bap, bap1, sizeof(bap1)); + } + while ((csnroffst + 1) <= 63 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, csnroffst + 1, 0) >= 0) { + csnroffst++; + memcpy(bap, bap1, sizeof(bap1)); + } + + fsnroffst = 0; + while ((fsnroffst + SNR_INC1) <= 15 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst, fsnroffst + SNR_INC1) >= 0) { + fsnroffst += SNR_INC1; + memcpy(bap, bap1, sizeof(bap1)); + } + while ((fsnroffst + 1) <= 15 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst, fsnroffst + 1) >= 0) { + fsnroffst++; + memcpy(bap, bap1, sizeof(bap1)); + } + + s->csnroffst = csnroffst; + for(ch=0;chnb_all_channels;ch++) + s->fsnroffst[ch] = fsnroffst; +#if defined(DEBUG_BITALLOC) + { + int j; + + for(i=0;i<6;i++) { + for(ch=0;chnb_all_channels;ch++) { + printf("Block #%d Ch%d:\n", i, ch); + printf("bap="); + for(j=0;jnb_coefs[ch];j++) { + printf("%d ",bap[i][ch][j]); + } + printf("\n"); + } + } + } +#endif + return 0; +} + +void ac3_common_init(void) +{ + int i, j, k, l, v; + /* compute bndtab and masktab from bandsz */ + k = 0; + l = 0; + for(i=0;i<50;i++) { + bndtab[i] = l; + v = bndsz[i]; + for(j=0;jsample_rate; + int bitrate = avctx->bit_rate; + int channels = avctx->channels; + AC3EncodeContext *s = avctx->priv_data; + int i, j, ch; + float alpha; + static const uint8_t acmod_defs[6] = { + 0x01, /* C */ + 0x02, /* L R */ + 0x03, /* L C R */ + 0x06, /* L R SL SR */ + 0x07, /* L C R SL SR */ + 0x07, /* L C R SL SR (+LFE) */ + }; + + avctx->frame_size = AC3_FRAME_SIZE; + + /* number of channels */ + if (channels < 1 || channels > 6) + return -1; + s->acmod = acmod_defs[channels - 1]; + s->lfe = (channels == 6) ? 1 : 0; + s->nb_all_channels = channels; + s->nb_channels = channels > 5 ? 5 : channels; + s->lfe_channel = s->lfe ? 5 : -1; + + /* frequency */ + for(i=0;i<3;i++) { + for(j=0;j<3;j++) + if ((ac3_freqs[j] >> i) == freq) + goto found; + } + return -1; + found: + s->sample_rate = freq; + s->halfratecod = i; + s->fscod = j; + s->bsid = 8 + s->halfratecod; + s->bsmod = 0; /* complete main audio service */ + + /* bitrate & frame size */ + bitrate /= 1000; + for(i=0;i<19;i++) { + if ((ac3_bitratetab[i] >> s->halfratecod) == bitrate) + break; + } + if (i == 19) + return -1; + s->bit_rate = bitrate; + s->frmsizecod = i << 1; + s->frame_size_min = (bitrate * 1000 * AC3_FRAME_SIZE) / (freq * 16); + s->bits_written = 0; + s->samples_written = 0; + s->frame_size = s->frame_size_min; + + /* bit allocation init */ + for(ch=0;chnb_channels;ch++) { + /* bandwidth for each channel */ + /* XXX: should compute the bandwidth according to the frame + size, so that we avoid anoying high freq artefacts */ + s->chbwcod[ch] = 50; /* sample bandwidth as mpeg audio layer 2 table 0 */ + s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37; + } + if (s->lfe) { + s->nb_coefs[s->lfe_channel] = 7; /* fixed */ + } + /* initial snr offset */ + s->csnroffst = 40; + + ac3_common_init(); + + /* mdct init */ + kbd_window_init(5.0, ac3_window, 50); + + fft_init(MDCT_NBITS - 2); + for(i=0;icoded_frame= avcodec_alloc_frame(); + avctx->coded_frame->key_frame= 1; + + return 0; +} + +/* output the AC3 frame header */ +static void output_frame_header(AC3EncodeContext *s, unsigned char *frame) +{ + init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE); + + put_bits(&s->pb, 16, 0x0b77); /* frame header */ + put_bits(&s->pb, 16, 0); /* crc1: will be filled later */ + put_bits(&s->pb, 2, s->fscod); + put_bits(&s->pb, 6, s->frmsizecod + (s->frame_size - s->frame_size_min)); + put_bits(&s->pb, 5, s->bsid); + put_bits(&s->pb, 3, s->bsmod); + put_bits(&s->pb, 3, s->acmod); + if ((s->acmod & 0x01) && s->acmod != 0x01) + put_bits(&s->pb, 2, 1); /* XXX -4.5 dB */ + if (s->acmod & 0x04) + put_bits(&s->pb, 2, 1); /* XXX -6 dB */ + if (s->acmod == 0x02) + put_bits(&s->pb, 2, 0); /* surround not indicated */ + put_bits(&s->pb, 1, s->lfe); /* LFE */ + put_bits(&s->pb, 5, 31); /* dialog norm: -31 db */ + put_bits(&s->pb, 1, 0); /* no compression control word */ + put_bits(&s->pb, 1, 0); /* no lang code */ + put_bits(&s->pb, 1, 0); /* no audio production info */ + put_bits(&s->pb, 1, 0); /* no copyright */ + put_bits(&s->pb, 1, 1); /* original bitstream */ + put_bits(&s->pb, 1, 0); /* no time code 1 */ + put_bits(&s->pb, 1, 0); /* no time code 2 */ + put_bits(&s->pb, 1, 0); /* no addtional bit stream info */ +} + +/* symetric quantization on 'levels' levels */ +static inline int sym_quant(int c, int e, int levels) +{ + int v; + + if (c >= 0) { + v = (levels * (c << e)) >> 24; + v = (v + 1) >> 1; + v = (levels >> 1) + v; + } else { + v = (levels * ((-c) << e)) >> 24; + v = (v + 1) >> 1; + v = (levels >> 1) - v; + } + assert (v >= 0 && v < levels); + return v; +} + +/* asymetric quantization on 2^qbits levels */ +static inline int asym_quant(int c, int e, int qbits) +{ + int lshift, m, v; + + lshift = e + qbits - 24; + if (lshift >= 0) + v = c << lshift; + else + v = c >> (-lshift); + /* rounding */ + v = (v + 1) >> 1; + m = (1 << (qbits-1)); + if (v >= m) + v = m - 1; + assert(v >= -m); + return v & ((1 << qbits)-1); +} + +/* Output one audio block. There are NB_BLOCKS audio blocks in one AC3 + frame */ +static void output_audio_block(AC3EncodeContext *s, + uint8_t exp_strategy[AC3_MAX_CHANNELS], + uint8_t encoded_exp[AC3_MAX_CHANNELS][N/2], + uint8_t bap[AC3_MAX_CHANNELS][N/2], + int32_t mdct_coefs[AC3_MAX_CHANNELS][N/2], + int8_t global_exp[AC3_MAX_CHANNELS], + int block_num) +{ + int ch, nb_groups, group_size, i, baie, rbnd; + uint8_t *p; + uint16_t qmant[AC3_MAX_CHANNELS][N/2]; + int exp0, exp1; + int mant1_cnt, mant2_cnt, mant4_cnt; + uint16_t *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; + int delta0, delta1, delta2; + + for(ch=0;chnb_channels;ch++) + put_bits(&s->pb, 1, 0); /* 512 point MDCT */ + for(ch=0;chnb_channels;ch++) + put_bits(&s->pb, 1, 1); /* no dither */ + put_bits(&s->pb, 1, 0); /* no dynamic range */ + if (block_num == 0) { + /* for block 0, even if no coupling, we must say it. This is a + waste of bit :-) */ + put_bits(&s->pb, 1, 1); /* coupling strategy present */ + put_bits(&s->pb, 1, 0); /* no coupling strategy */ + } else { + put_bits(&s->pb, 1, 0); /* no new coupling strategy */ + } + + if (s->acmod == 2) + { + if(block_num==0) + { + /* first block must define rematrixing (rematstr) */ + put_bits(&s->pb, 1, 1); + + /* dummy rematrixing rematflg(1:4)=0 */ + for (rbnd=0;rbnd<4;rbnd++) + put_bits(&s->pb, 1, 0); + } + else + { + /* no matrixing (but should be used in the future) */ + put_bits(&s->pb, 1, 0); + } + } + +#if defined(DEBUG) + { + static int count = 0; + av_log(NULL, AV_LOG_DEBUG, "Block #%d (%d)\n", block_num, count++); + } +#endif + /* exponent strategy */ + for(ch=0;chnb_channels;ch++) { + put_bits(&s->pb, 2, exp_strategy[ch]); + } + + if (s->lfe) { + put_bits(&s->pb, 1, exp_strategy[s->lfe_channel]); + } + + for(ch=0;chnb_channels;ch++) { + if (exp_strategy[ch] != EXP_REUSE) + put_bits(&s->pb, 6, s->chbwcod[ch]); + } + + /* exponents */ + for (ch = 0; ch < s->nb_all_channels; ch++) { + switch(exp_strategy[ch]) { + case EXP_REUSE: + continue; + case EXP_D15: + group_size = 1; + break; + case EXP_D25: + group_size = 2; + break; + default: + case EXP_D45: + group_size = 4; + break; + } + nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); + p = encoded_exp[ch]; + + /* first exponent */ + exp1 = *p++; + put_bits(&s->pb, 4, exp1); + + /* next ones are delta encoded */ + for(i=0;ipb, 7, ((delta0 * 5 + delta1) * 5) + delta2); + } + + if (ch != s->lfe_channel) + put_bits(&s->pb, 2, 0); /* no gain range info */ + } + + /* bit allocation info */ + baie = (block_num == 0); + put_bits(&s->pb, 1, baie); + if (baie) { + put_bits(&s->pb, 2, s->sdecaycod); + put_bits(&s->pb, 2, s->fdecaycod); + put_bits(&s->pb, 2, s->sgaincod); + put_bits(&s->pb, 2, s->dbkneecod); + put_bits(&s->pb, 3, s->floorcod); + } + + /* snr offset */ + put_bits(&s->pb, 1, baie); /* always present with bai */ + if (baie) { + put_bits(&s->pb, 6, s->csnroffst); + for(ch=0;chnb_all_channels;ch++) { + put_bits(&s->pb, 4, s->fsnroffst[ch]); + put_bits(&s->pb, 3, s->fgaincod[ch]); + } + } + + put_bits(&s->pb, 1, 0); /* no delta bit allocation */ + put_bits(&s->pb, 1, 0); /* no data to skip */ + + /* mantissa encoding : we use two passes to handle the grouping. A + one pass method may be faster, but it would necessitate to + modify the output stream. */ + + /* first pass: quantize */ + mant1_cnt = mant2_cnt = mant4_cnt = 0; + qmant1_ptr = qmant2_ptr = qmant4_ptr = NULL; + + for (ch = 0; ch < s->nb_all_channels; ch++) { + int b, c, e, v; + + for(i=0;inb_coefs[ch];i++) { + c = mdct_coefs[ch][i]; + e = encoded_exp[ch][i] - global_exp[ch]; + b = bap[ch][i]; + switch(b) { + case 0: + v = 0; + break; + case 1: + v = sym_quant(c, e, 3); + switch(mant1_cnt) { + case 0: + qmant1_ptr = &qmant[ch][i]; + v = 9 * v; + mant1_cnt = 1; + break; + case 1: + *qmant1_ptr += 3 * v; + mant1_cnt = 2; + v = 128; + break; + default: + *qmant1_ptr += v; + mant1_cnt = 0; + v = 128; + break; + } + break; + case 2: + v = sym_quant(c, e, 5); + switch(mant2_cnt) { + case 0: + qmant2_ptr = &qmant[ch][i]; + v = 25 * v; + mant2_cnt = 1; + break; + case 1: + *qmant2_ptr += 5 * v; + mant2_cnt = 2; + v = 128; + break; + default: + *qmant2_ptr += v; + mant2_cnt = 0; + v = 128; + break; + } + break; + case 3: + v = sym_quant(c, e, 7); + break; + case 4: + v = sym_quant(c, e, 11); + switch(mant4_cnt) { + case 0: + qmant4_ptr = &qmant[ch][i]; + v = 11 * v; + mant4_cnt = 1; + break; + default: + *qmant4_ptr += v; + mant4_cnt = 0; + v = 128; + break; + } + break; + case 5: + v = sym_quant(c, e, 15); + break; + case 14: + v = asym_quant(c, e, 14); + break; + case 15: + v = asym_quant(c, e, 16); + break; + default: + v = asym_quant(c, e, b - 1); + break; + } + qmant[ch][i] = v; + } + } + + /* second pass : output the values */ + for (ch = 0; ch < s->nb_all_channels; ch++) { + int b, q; + + for(i=0;inb_coefs[ch];i++) { + q = qmant[ch][i]; + b = bap[ch][i]; + switch(b) { + case 0: + break; + case 1: + if (q != 128) + put_bits(&s->pb, 5, q); + break; + case 2: + if (q != 128) + put_bits(&s->pb, 7, q); + break; + case 3: + put_bits(&s->pb, 3, q); + break; + case 4: + if (q != 128) + put_bits(&s->pb, 7, q); + break; + case 14: + put_bits(&s->pb, 14, q); + break; + case 15: + put_bits(&s->pb, 16, q); + break; + default: + put_bits(&s->pb, b - 1, q); + break; + } + } + } +} + +#define CRC16_POLY ((1 << 0) | (1 << 2) | (1 << 15) | (1 << 16)) + +static unsigned int mul_poly(unsigned int a, unsigned int b, unsigned int poly) +{ + unsigned int c; + + c = 0; + while (a) { + if (a & 1) + c ^= b; + a = a >> 1; + b = b << 1; + if (b & (1 << 16)) + b ^= poly; + } + return c; +} + +static unsigned int pow_poly(unsigned int a, unsigned int n, unsigned int poly) +{ + unsigned int r; + r = 1; + while (n) { + if (n & 1) + r = mul_poly(r, a, poly); + a = mul_poly(a, a, poly); + n >>= 1; + } + return r; +} + + +/* compute log2(max(abs(tab[]))) */ +static int log2_tab(int16_t *tab, int n) +{ + int i, v; + + v = 0; + for(i=0;i 0) { + for(i=0;i>= lshift; + } + } +} + +/* fill the end of the frame and compute the two crcs */ +static int output_frame_end(AC3EncodeContext *s) +{ + int frame_size, frame_size_58, n, crc1, crc2, crc_inv; + uint8_t *frame; + + frame_size = s->frame_size; /* frame size in words */ + /* align to 8 bits */ + flush_put_bits(&s->pb); + /* add zero bytes to reach the frame size */ + frame = s->pb.buf; + n = 2 * s->frame_size - (pbBufPtr(&s->pb) - frame) - 2; + assert(n >= 0); + if(n>0) + memset(pbBufPtr(&s->pb), 0, n); + + /* Now we must compute both crcs : this is not so easy for crc1 + because it is at the beginning of the data... */ + frame_size_58 = (frame_size >> 1) + (frame_size >> 3); + crc1 = bswap_16(av_crc(av_crc8005, 0, frame + 4, 2 * frame_size_58 - 4)); + /* XXX: could precompute crc_inv */ + crc_inv = pow_poly((CRC16_POLY >> 1), (16 * frame_size_58) - 16, CRC16_POLY); + crc1 = mul_poly(crc_inv, crc1, CRC16_POLY); + frame[2] = crc1 >> 8; + frame[3] = crc1; + + crc2 = bswap_16(av_crc(av_crc8005, 0, frame + 2 * frame_size_58, (frame_size - frame_size_58) * 2 - 2)); + frame[2*frame_size - 2] = crc2 >> 8; + frame[2*frame_size - 1] = crc2; + + // printf("n=%d frame_size=%d\n", n, frame_size); + return frame_size * 2; +} + +static int AC3_encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + AC3EncodeContext *s = avctx->priv_data; + int16_t *samples = data; + int i, j, k, v, ch; + int16_t input_samples[N]; + int32_t mdct_coef[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + uint8_t exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + uint8_t exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS]; + uint8_t encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + uint8_t bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + int8_t exp_samples[NB_BLOCKS][AC3_MAX_CHANNELS]; + int frame_bits; + + frame_bits = 0; + for(ch=0;chnb_all_channels;ch++) { + /* fixed mdct to the six sub blocks & exponent computation */ + for(i=0;ilast_samples[ch], N/2 * sizeof(int16_t)); + sinc = s->nb_all_channels; + sptr = samples + (sinc * (N/2) * i) + ch; + for(j=0;jlast_samples[ch][j] = v; + sptr += sinc; + } + + /* apply the MDCT window */ + for(j=0;j> 15; + input_samples[N-j-1] = MUL16(input_samples[N-j-1], + ac3_window[j]) >> 15; + } + + /* Normalize the samples to use the maximum available + precision */ + v = 14 - log2_tab(input_samples, N); + if (v < 0) + v = 0; + exp_samples[i][ch] = v - 9; + lshift_tab(input_samples, N, v); + + /* do the MDCT */ + mdct512(mdct_coef[i][ch], input_samples); + + /* compute "exponents". We take into account the + normalization there */ + for(j=0;j= 24) { + e = 24; + mdct_coef[i][ch][j] = 0; + } + } + exp[i][ch][j] = e; + } + } + + compute_exp_strategy(exp_strategy, exp, ch, ch == s->lfe_channel); + + /* compute the exponents as the decoder will see them. The + EXP_REUSE case must be handled carefully : we select the + min of the exponents */ + i = 0; + while (i < NB_BLOCKS) { + j = i + 1; + while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) { + exponent_min(exp[i][ch], exp[j][ch], s->nb_coefs[ch]); + j++; + } + frame_bits += encode_exp(encoded_exp[i][ch], + exp[i][ch], s->nb_coefs[ch], + exp_strategy[i][ch]); + /* copy encoded exponents for reuse case */ + for(k=i+1;knb_coefs[ch] * sizeof(uint8_t)); + } + i = j; + } + } + + /* adjust for fractional frame sizes */ + while(s->bits_written >= s->bit_rate*1000 && s->samples_written >= s->sample_rate) { + s->bits_written -= s->bit_rate*1000; + s->samples_written -= s->sample_rate; + } + s->frame_size = s->frame_size_min + (s->bits_written * s->sample_rate < s->samples_written * s->bit_rate*1000); + s->bits_written += s->frame_size * 16; + s->samples_written += AC3_FRAME_SIZE; + + compute_bit_allocation(s, bap, encoded_exp, exp_strategy, frame_bits); + /* everything is known... let's output the frame */ + output_frame_header(s, frame); + + for(i=0;icoded_frame); + return 0; +} + +#if 0 +/*************************************************************************/ +/* TEST */ + +#define FN (N/4) + +void fft_test(void) +{ + IComplex in[FN], in1[FN]; + int k, n, i; + float sum_re, sum_im, a; + + /* FFT test */ + + for(i=0;i emax) + emax = e; + err += e * e; + } + printf("err2=%f emax=%f\n", err / (N/2), emax); +} + +void test_ac3(void) +{ + AC3EncodeContext ctx; + unsigned char frame[AC3_MAX_CODED_FRAME_SIZE]; + short samples[AC3_FRAME_SIZE]; + int ret, i; + + AC3_encode_init(&ctx, 44100, 64000, 1); + + fft_test(); + mdct_test(); + + for(i=0;i 32767) \ + value = 32767; \ +else if (value < -32768) \ + value = -32768; \ + +/* step_table[] and index_table[] are from the ADPCM reference source */ +/* This is the index table: */ +static const int index_table[16] = { + -1, -1, -1, -1, 2, 4, 6, 8, + -1, -1, -1, -1, 2, 4, 6, 8, +}; + +/** + * This is the step table. Note that many programs use slight deviations from + * this table, but such deviations are negligible: + */ +static const int step_table[89] = { + 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, + 19, 21, 23, 25, 28, 31, 34, 37, 41, 45, + 50, 55, 60, 66, 73, 80, 88, 97, 107, 118, + 130, 143, 157, 173, 190, 209, 230, 253, 279, 307, + 337, 371, 408, 449, 494, 544, 598, 658, 724, 796, + 876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066, + 2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358, + 5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899, + 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 +}; + +/* These are for MS-ADPCM */ +/* AdaptationTable[], AdaptCoeff1[], and AdaptCoeff2[] are from libsndfile */ +static const int AdaptationTable[] = { + 230, 230, 230, 230, 307, 409, 512, 614, + 768, 614, 512, 409, 307, 230, 230, 230 +}; + +static const int AdaptCoeff1[] = { + 256, 512, 0, 192, 240, 460, 392 +}; + +static const int AdaptCoeff2[] = { + 0, -256, 0, 64, 0, -208, -232 +}; + +/* These are for CD-ROM XA ADPCM */ +static const int xa_adpcm_table[5][2] = { + { 0, 0 }, + { 60, 0 }, + { 115, -52 }, + { 98, -55 }, + { 122, -60 } +}; + +static const int ea_adpcm_table[] = { + 0, 240, 460, 392, 0, 0, -208, -220, 0, 1, + 3, 4, 7, 8, 10, 11, 0, -1, -3, -4 +}; + +static const int ct_adpcm_table[8] = { + 0x00E6, 0x00E6, 0x00E6, 0x00E6, + 0x0133, 0x0199, 0x0200, 0x0266 +}; + +// padded to zero where table size is less then 16 +static const int swf_index_tables[4][16] = { + /*2*/ { -1, 2 }, + /*3*/ { -1, -1, 2, 4 }, + /*4*/ { -1, -1, -1, -1, 2, 4, 6, 8 }, + /*5*/ { -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16 } +}; + +static const int yamaha_indexscale[] = { + 230, 230, 230, 230, 307, 409, 512, 614, + 230, 230, 230, 230, 307, 409, 512, 614 +}; + +static const int yamaha_difflookup[] = { + 1, 3, 5, 7, 9, 11, 13, 15, + -1, -3, -5, -7, -9, -11, -13, -15 +}; + +/* end of tables */ + +typedef struct ADPCMChannelStatus { + int predictor; + short int step_index; + int step; + /* for encoding */ + int prev_sample; + + /* MS version */ + short sample1; + short sample2; + int coeff1; + int coeff2; + int idelta; +} ADPCMChannelStatus; + +typedef struct ADPCMContext { + int channel; /* for stereo MOVs, decode left, then decode right, then tell it's decoded */ + ADPCMChannelStatus status[2]; + short sample_buffer[32]; /* hold left samples while waiting for right samples */ + + /* SWF only */ + int nb_bits; + int nb_samples; +} ADPCMContext; + +/* XXX: implement encoding */ + +#ifdef CONFIG_ENCODERS +static int adpcm_encode_init(AVCodecContext *avctx) +{ + if (avctx->channels > 2) + return -1; /* only stereo or mono =) */ + switch(avctx->codec->id) { + case CODEC_ID_ADPCM_IMA_QT: + av_log(avctx, AV_LOG_ERROR, "ADPCM: codec adpcm_ima_qt unsupported for encoding !\n"); + avctx->frame_size = 64; /* XXX: can multiple of avctx->channels * 64 (left and right blocks are interleaved) */ + return -1; + break; + case CODEC_ID_ADPCM_IMA_WAV: + avctx->frame_size = (BLKSIZE - 4 * avctx->channels) * 8 / (4 * avctx->channels) + 1; /* each 16 bits sample gives one nibble */ + /* and we have 4 bytes per channel overhead */ + avctx->block_align = BLKSIZE; + /* seems frame_size isn't taken into account... have to buffer the samples :-( */ + break; + case CODEC_ID_ADPCM_MS: + avctx->frame_size = (BLKSIZE - 7 * avctx->channels) * 2 / avctx->channels + 2; /* each 16 bits sample gives one nibble */ + /* and we have 7 bytes per channel overhead */ + avctx->block_align = BLKSIZE; + break; + case CODEC_ID_ADPCM_YAMAHA: + avctx->frame_size = BLKSIZE * avctx->channels; + avctx->block_align = BLKSIZE; + break; + default: + return -1; + break; + } + + avctx->coded_frame= avcodec_alloc_frame(); + avctx->coded_frame->key_frame= 1; + + return 0; +} + +static int adpcm_encode_close(AVCodecContext *avctx) +{ + av_freep(&avctx->coded_frame); + + return 0; +} + + +static inline unsigned char adpcm_ima_compress_sample(ADPCMChannelStatus *c, short sample) +{ + int step_index; + unsigned char nibble; + + int sign = 0; /* sign bit of the nibble (MSB) */ + int delta, predicted_delta; + + delta = sample - c->prev_sample; + + if (delta < 0) { + sign = 1; + delta = -delta; + } + + step_index = c->step_index; + + /* nibble = 4 * delta / step_table[step_index]; */ + nibble = (delta << 2) / step_table[step_index]; + + if (nibble > 7) + nibble = 7; + + step_index += index_table[nibble]; + if (step_index < 0) + step_index = 0; + if (step_index > 88) + step_index = 88; + + /* what the decoder will find */ + predicted_delta = ((step_table[step_index] * nibble) / 4) + (step_table[step_index] / 8); + + if (sign) + c->prev_sample -= predicted_delta; + else + c->prev_sample += predicted_delta; + + CLAMP_TO_SHORT(c->prev_sample); + + + nibble += sign << 3; /* sign * 8 */ + + /* save back */ + c->step_index = step_index; + + return nibble; +} + +static inline unsigned char adpcm_ms_compress_sample(ADPCMChannelStatus *c, short sample) +{ + int predictor, nibble, bias; + + predictor = (((c->sample1) * (c->coeff1)) + ((c->sample2) * (c->coeff2))) / 256; + + nibble= sample - predictor; + if(nibble>=0) bias= c->idelta/2; + else bias=-c->idelta/2; + + nibble= (nibble + bias) / c->idelta; + nibble= clip(nibble, -8, 7)&0x0F; + + predictor += (signed)((nibble & 0x08)?(nibble - 0x10):(nibble)) * c->idelta; + CLAMP_TO_SHORT(predictor); + + c->sample2 = c->sample1; + c->sample1 = predictor; + + c->idelta = (AdaptationTable[(int)nibble] * c->idelta) >> 8; + if (c->idelta < 16) c->idelta = 16; + + return nibble; +} + +static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c, short sample) +{ + int i1 = 0, j1; + + if(!c->step) { + c->predictor = 0; + c->step = 127; + } + j1 = sample - c->predictor; + + j1 = (j1 * 8) / c->step; + i1 = abs(j1) / 2; + if (i1 > 7) + i1 = 7; + if (j1 < 0) + i1 += 8; + + c->predictor = c->predictor + ((c->step * yamaha_difflookup[i1]) / 8); + CLAMP_TO_SHORT(c->predictor); + c->step = (c->step * yamaha_indexscale[i1]) >> 8; + c->step = clip(c->step, 127, 24567); + + return i1; +} + +static int adpcm_encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + int n, i, st; + short *samples; + unsigned char *dst; + ADPCMContext *c = avctx->priv_data; + + dst = frame; + samples = (short *)data; + st= avctx->channels == 2; +/* n = (BLKSIZE - 4 * avctx->channels) / (2 * 8 * avctx->channels); */ + + switch(avctx->codec->id) { + case CODEC_ID_ADPCM_IMA_QT: /* XXX: can't test until we get .mov writer */ + break; + case CODEC_ID_ADPCM_IMA_WAV: + n = avctx->frame_size / 8; + c->status[0].prev_sample = (signed short)samples[0]; /* XXX */ +/* c->status[0].step_index = 0; *//* XXX: not sure how to init the state machine */ + *dst++ = (c->status[0].prev_sample) & 0xFF; /* little endian */ + *dst++ = (c->status[0].prev_sample >> 8) & 0xFF; + *dst++ = (unsigned char)c->status[0].step_index; + *dst++ = 0; /* unknown */ + samples++; + if (avctx->channels == 2) { + c->status[1].prev_sample = (signed short)samples[1]; +/* c->status[1].step_index = 0; */ + *dst++ = (c->status[1].prev_sample) & 0xFF; + *dst++ = (c->status[1].prev_sample >> 8) & 0xFF; + *dst++ = (unsigned char)c->status[1].step_index; + *dst++ = 0; + samples++; + } + + /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */ + for (; n>0; n--) { + *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F; + *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels]) << 4) & 0xF0; + dst++; + *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 2]) & 0x0F; + *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 3]) << 4) & 0xF0; + dst++; + *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 4]) & 0x0F; + *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 5]) << 4) & 0xF0; + dst++; + *dst = adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 6]) & 0x0F; + *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels * 7]) << 4) & 0xF0; + dst++; + /* right channel */ + if (avctx->channels == 2) { + *dst = adpcm_ima_compress_sample(&c->status[1], samples[1]); + *dst |= adpcm_ima_compress_sample(&c->status[1], samples[3]) << 4; + dst++; + *dst = adpcm_ima_compress_sample(&c->status[1], samples[5]); + *dst |= adpcm_ima_compress_sample(&c->status[1], samples[7]) << 4; + dst++; + *dst = adpcm_ima_compress_sample(&c->status[1], samples[9]); + *dst |= adpcm_ima_compress_sample(&c->status[1], samples[11]) << 4; + dst++; + *dst = adpcm_ima_compress_sample(&c->status[1], samples[13]); + *dst |= adpcm_ima_compress_sample(&c->status[1], samples[15]) << 4; + dst++; + } + samples += 8 * avctx->channels; + } + break; + case CODEC_ID_ADPCM_MS: + for(i=0; ichannels; i++){ + int predictor=0; + + *dst++ = predictor; + c->status[i].coeff1 = AdaptCoeff1[predictor]; + c->status[i].coeff2 = AdaptCoeff2[predictor]; + } + for(i=0; ichannels; i++){ + if (c->status[i].idelta < 16) + c->status[i].idelta = 16; + + *dst++ = c->status[i].idelta & 0xFF; + *dst++ = c->status[i].idelta >> 8; + } + for(i=0; ichannels; i++){ + c->status[i].sample1= *samples++; + + *dst++ = c->status[i].sample1 & 0xFF; + *dst++ = c->status[i].sample1 >> 8; + } + for(i=0; ichannels; i++){ + c->status[i].sample2= *samples++; + + *dst++ = c->status[i].sample2 & 0xFF; + *dst++ = c->status[i].sample2 >> 8; + } + + for(i=7*avctx->channels; iblock_align; i++) { + int nibble; + nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4; + nibble|= adpcm_ms_compress_sample(&c->status[st], *samples++); + *dst++ = nibble; + } + break; + case CODEC_ID_ADPCM_YAMAHA: + n = avctx->frame_size / 2; + for (; n>0; n--) { + for(i = 0; i < avctx->channels; i++) { + int nibble; + nibble = adpcm_yamaha_compress_sample(&c->status[i], samples[i]); + nibble |= adpcm_yamaha_compress_sample(&c->status[i], samples[i+avctx->channels]) << 4; + *dst++ = nibble; + } + samples += 2 * avctx->channels; + } + break; + default: + return -1; + } + return dst - frame; +} +#endif //CONFIG_ENCODERS + +static int adpcm_decode_init(AVCodecContext * avctx) +{ + ADPCMContext *c = avctx->priv_data; + + c->channel = 0; + c->status[0].predictor = c->status[1].predictor = 0; + c->status[0].step_index = c->status[1].step_index = 0; + c->status[0].step = c->status[1].step = 0; + + switch(avctx->codec->id) { + case CODEC_ID_ADPCM_CT: + c->status[0].step = c->status[1].step = 511; + break; + default: + break; + } + return 0; +} + +static inline short adpcm_ima_expand_nibble(ADPCMChannelStatus *c, char nibble, int shift) +{ + int step_index; + int predictor; + int sign, delta, diff, step; + + step = step_table[c->step_index]; + step_index = c->step_index + index_table[(unsigned)nibble]; + if (step_index < 0) step_index = 0; + else if (step_index > 88) step_index = 88; + + sign = nibble & 8; + delta = nibble & 7; + /* perform direct multiplication instead of series of jumps proposed by + * the reference ADPCM implementation since modern CPUs can do the mults + * quickly enough */ + diff = ((2 * delta + 1) * step) >> shift; + predictor = c->predictor; + if (sign) predictor -= diff; + else predictor += diff; + + CLAMP_TO_SHORT(predictor); + c->predictor = predictor; + c->step_index = step_index; + + return (short)predictor; +} + +static inline short adpcm_ms_expand_nibble(ADPCMChannelStatus *c, char nibble) +{ + int predictor; + + predictor = (((c->sample1) * (c->coeff1)) + ((c->sample2) * (c->coeff2))) / 256; + predictor += (signed)((nibble & 0x08)?(nibble - 0x10):(nibble)) * c->idelta; + CLAMP_TO_SHORT(predictor); + + c->sample2 = c->sample1; + c->sample1 = predictor; + c->idelta = (AdaptationTable[(int)nibble] * c->idelta) >> 8; + if (c->idelta < 16) c->idelta = 16; + + return (short)predictor; +} + +static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble) +{ + int predictor; + int sign, delta, diff; + int new_step; + + sign = nibble & 8; + delta = nibble & 7; + /* perform direct multiplication instead of series of jumps proposed by + * the reference ADPCM implementation since modern CPUs can do the mults + * quickly enough */ + diff = ((2 * delta + 1) * c->step) >> 3; + predictor = c->predictor; + /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */ + if(sign) + predictor = ((predictor * 254) >> 8) - diff; + else + predictor = ((predictor * 254) >> 8) + diff; + /* calculate new step and clamp it to range 511..32767 */ + new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8; + c->step = new_step; + if(c->step < 511) + c->step = 511; + if(c->step > 32767) + c->step = 32767; + + CLAMP_TO_SHORT(predictor); + c->predictor = predictor; + return (short)predictor; +} + +static inline short adpcm_sbpro_expand_nibble(ADPCMChannelStatus *c, char nibble, int size, int shift) +{ + int sign, delta, diff; + + sign = nibble & (1<<(size-1)); + delta = nibble & ((1<<(size-1))-1); + diff = delta << (7 + c->step + shift); + + if (sign) + c->predictor -= diff; + else + c->predictor += diff; + + /* clamp result */ + if (c->predictor > 16256) + c->predictor = 16256; + else if (c->predictor < -16384) + c->predictor = -16384; + + /* calculate new step */ + if (delta >= (2*size - 3) && c->step < 3) + c->step++; + else if (delta == 0 && c->step > 0) + c->step--; + + return (short) c->predictor; +} + +static inline short adpcm_yamaha_expand_nibble(ADPCMChannelStatus *c, unsigned char nibble) +{ + if(!c->step) { + c->predictor = 0; + c->step = 127; + } + + c->predictor += (c->step * yamaha_difflookup[nibble]) / 8; + CLAMP_TO_SHORT(c->predictor); + c->step = (c->step * yamaha_indexscale[nibble]) >> 8; + c->step = clip(c->step, 127, 24567); + return c->predictor; +} + +static void xa_decode(short *out, const unsigned char *in, + ADPCMChannelStatus *left, ADPCMChannelStatus *right, int inc) +{ + int i, j; + int shift,filter,f0,f1; + int s_1,s_2; + int d,s,t; + + for(i=0;i<4;i++) { + + shift = 12 - (in[4+i*2] & 15); + filter = in[4+i*2] >> 4; + f0 = xa_adpcm_table[filter][0]; + f1 = xa_adpcm_table[filter][1]; + + s_1 = left->sample1; + s_2 = left->sample2; + + for(j=0;j<28;j++) { + d = in[16+i+j*4]; + + t = (signed char)(d<<4)>>4; + s = ( t<>6); + CLAMP_TO_SHORT(s); + *out = s; + out += inc; + s_2 = s_1; + s_1 = s; + } + + if (inc==2) { /* stereo */ + left->sample1 = s_1; + left->sample2 = s_2; + s_1 = right->sample1; + s_2 = right->sample2; + out = out + 1 - 28*2; + } + + shift = 12 - (in[5+i*2] & 15); + filter = in[5+i*2] >> 4; + + f0 = xa_adpcm_table[filter][0]; + f1 = xa_adpcm_table[filter][1]; + + for(j=0;j<28;j++) { + d = in[16+i+j*4]; + + t = (signed char)d >> 4; + s = ( t<>6); + CLAMP_TO_SHORT(s); + *out = s; + out += inc; + s_2 = s_1; + s_1 = s; + } + + if (inc==2) { /* stereo */ + right->sample1 = s_1; + right->sample2 = s_2; + out -= 1; + } else { + left->sample1 = s_1; + left->sample2 = s_2; + } + } +} + + +/* DK3 ADPCM support macro */ +#define DK3_GET_NEXT_NIBBLE() \ + if (decode_top_nibble_next) \ + { \ + nibble = (last_byte >> 4) & 0x0F; \ + decode_top_nibble_next = 0; \ + } \ + else \ + { \ + last_byte = *src++; \ + if (src >= buf + buf_size) break; \ + nibble = last_byte & 0x0F; \ + decode_top_nibble_next = 1; \ + } + +static int adpcm_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + ADPCMContext *c = avctx->priv_data; + ADPCMChannelStatus *cs; + int n, m, channel, i; + int block_predictor[2]; + short *samples; + uint8_t *src; + int st; /* stereo */ + + /* DK3 ADPCM accounting variables */ + unsigned char last_byte = 0; + unsigned char nibble; + int decode_top_nibble_next = 0; + int diff_channel; + + /* EA ADPCM state variables */ + uint32_t samples_in_chunk; + int32_t previous_left_sample, previous_right_sample; + int32_t current_left_sample, current_right_sample; + int32_t next_left_sample, next_right_sample; + int32_t coeff1l, coeff2l, coeff1r, coeff2r; + uint8_t shift_left, shift_right; + int count1, count2; + + if (!buf_size) + return 0; + + samples = data; + src = buf; + + st = avctx->channels == 2 ? 1 : 0; + + switch(avctx->codec->id) { + case CODEC_ID_ADPCM_IMA_QT: + n = (buf_size - 2);/* >> 2*avctx->channels;*/ + channel = c->channel; + cs = &(c->status[channel]); + /* (pppppp) (piiiiiii) */ + + /* Bits 15-7 are the _top_ 9 bits of the 16-bit initial predictor value */ + cs->predictor = (*src++) << 8; + cs->predictor |= (*src & 0x80); + cs->predictor &= 0xFF80; + + /* sign extension */ + if(cs->predictor & 0x8000) + cs->predictor -= 0x10000; + + CLAMP_TO_SHORT(cs->predictor); + + cs->step_index = (*src++) & 0x7F; + + if (cs->step_index > 88){ + av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index); + cs->step_index = 88; + } + + cs->step = step_table[cs->step_index]; + + if (st && channel) + samples++; + + for(m=32; n>0 && m>0; n--, m--) { /* in QuickTime, IMA is encoded by chuncks of 34 bytes (=64 samples) */ + *samples = adpcm_ima_expand_nibble(cs, src[0] & 0x0F, 3); + samples += avctx->channels; + *samples = adpcm_ima_expand_nibble(cs, (src[0] >> 4) & 0x0F, 3); + samples += avctx->channels; + src ++; + } + + if(st) { /* handle stereo interlacing */ + c->channel = (channel + 1) % 2; /* we get one packet for left, then one for right data */ + if(channel == 1) { /* wait for the other packet before outputing anything */ + return src - buf; + } + } + break; + case CODEC_ID_ADPCM_IMA_WAV: + if (avctx->block_align != 0 && buf_size > avctx->block_align) + buf_size = avctx->block_align; + +// samples_per_block= (block_align-4*chanels)*8 / (bits_per_sample * chanels) + 1; + + for(i=0; ichannels; i++){ + cs = &(c->status[i]); + cs->predictor = (int16_t)(src[0] + (src[1]<<8)); + src+=2; + + // XXX: is this correct ??: *samples++ = cs->predictor; + + cs->step_index = *src++; + if (cs->step_index > 88){ + av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index); + cs->step_index = 88; + } + if (*src++) av_log(avctx, AV_LOG_ERROR, "unused byte should be null but is %d!!\n", src[-1]); /* unused */ + } + + while(src < buf + buf_size){ + for(m=0; m<4; m++){ + for(i=0; i<=st; i++) + *samples++ = adpcm_ima_expand_nibble(&c->status[i], src[4*i] & 0x0F, 3); + for(i=0; i<=st; i++) + *samples++ = adpcm_ima_expand_nibble(&c->status[i], src[4*i] >> 4 , 3); + src++; + } + src += 4*st; + } + break; + case CODEC_ID_ADPCM_4XM: + cs = &(c->status[0]); + c->status[0].predictor= (int16_t)(src[0] + (src[1]<<8)); src+=2; + if(st){ + c->status[1].predictor= (int16_t)(src[0] + (src[1]<<8)); src+=2; + } + c->status[0].step_index= (int16_t)(src[0] + (src[1]<<8)); src+=2; + if(st){ + c->status[1].step_index= (int16_t)(src[0] + (src[1]<<8)); src+=2; + } + if (cs->step_index < 0) cs->step_index = 0; + if (cs->step_index > 88) cs->step_index = 88; + + m= (buf_size - (src - buf))>>st; + for(i=0; istatus[0], src[i] & 0x0F, 4); + if (st) + *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4); + *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4); + if (st) + *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4); + } + + src += m<block_align != 0 && buf_size > avctx->block_align) + buf_size = avctx->block_align; + n = buf_size - 7 * avctx->channels; + if (n < 0) + return -1; + block_predictor[0] = clip(*src++, 0, 7); + block_predictor[1] = 0; + if (st) + block_predictor[1] = clip(*src++, 0, 7); + c->status[0].idelta = (int16_t)((*src & 0xFF) | ((src[1] << 8) & 0xFF00)); + src+=2; + if (st){ + c->status[1].idelta = (int16_t)((*src & 0xFF) | ((src[1] << 8) & 0xFF00)); + src+=2; + } + c->status[0].coeff1 = AdaptCoeff1[block_predictor[0]]; + c->status[0].coeff2 = AdaptCoeff2[block_predictor[0]]; + c->status[1].coeff1 = AdaptCoeff1[block_predictor[1]]; + c->status[1].coeff2 = AdaptCoeff2[block_predictor[1]]; + + c->status[0].sample1 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00)); + src+=2; + if (st) c->status[1].sample1 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00)); + if (st) src+=2; + c->status[0].sample2 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00)); + src+=2; + if (st) c->status[1].sample2 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00)); + if (st) src+=2; + + *samples++ = c->status[0].sample1; + if (st) *samples++ = c->status[1].sample1; + *samples++ = c->status[0].sample2; + if (st) *samples++ = c->status[1].sample2; + for(;n>0;n--) { + *samples++ = adpcm_ms_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F); + *samples++ = adpcm_ms_expand_nibble(&c->status[st], src[0] & 0x0F); + src ++; + } + break; + case CODEC_ID_ADPCM_IMA_DK4: + if (avctx->block_align != 0 && buf_size > avctx->block_align) + buf_size = avctx->block_align; + + c->status[0].predictor = (int16_t)(src[0] | (src[1] << 8)); + c->status[0].step_index = src[2]; + src += 4; + *samples++ = c->status[0].predictor; + if (st) { + c->status[1].predictor = (int16_t)(src[0] | (src[1] << 8)); + c->status[1].step_index = src[2]; + src += 4; + *samples++ = c->status[1].predictor; + } + while (src < buf + buf_size) { + + /* take care of the top nibble (always left or mono channel) */ + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F, 3); + + /* take care of the bottom nibble, which is right sample for + * stereo, or another mono sample */ + if (st) + *samples++ = adpcm_ima_expand_nibble(&c->status[1], + src[0] & 0x0F, 3); + else + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + src[0] & 0x0F, 3); + + src++; + } + break; + case CODEC_ID_ADPCM_IMA_DK3: + if (avctx->block_align != 0 && buf_size > avctx->block_align) + buf_size = avctx->block_align; + + c->status[0].predictor = (int16_t)(src[10] | (src[11] << 8)); + c->status[1].predictor = (int16_t)(src[12] | (src[13] << 8)); + c->status[0].step_index = src[14]; + c->status[1].step_index = src[15]; + /* sign extend the predictors */ + src += 16; + diff_channel = c->status[1].predictor; + + /* the DK3_GET_NEXT_NIBBLE macro issues the break statement when + * the buffer is consumed */ + while (1) { + + /* for this algorithm, c->status[0] is the sum channel and + * c->status[1] is the diff channel */ + + /* process the first predictor of the sum channel */ + DK3_GET_NEXT_NIBBLE(); + adpcm_ima_expand_nibble(&c->status[0], nibble, 3); + + /* process the diff channel predictor */ + DK3_GET_NEXT_NIBBLE(); + adpcm_ima_expand_nibble(&c->status[1], nibble, 3); + + /* process the first pair of stereo PCM samples */ + diff_channel = (diff_channel + c->status[1].predictor) / 2; + *samples++ = c->status[0].predictor + c->status[1].predictor; + *samples++ = c->status[0].predictor - c->status[1].predictor; + + /* process the second predictor of the sum channel */ + DK3_GET_NEXT_NIBBLE(); + adpcm_ima_expand_nibble(&c->status[0], nibble, 3); + + /* process the second pair of stereo PCM samples */ + diff_channel = (diff_channel + c->status[1].predictor) / 2; + *samples++ = c->status[0].predictor + c->status[1].predictor; + *samples++ = c->status[0].predictor - c->status[1].predictor; + } + break; + case CODEC_ID_ADPCM_IMA_WS: + /* no per-block initialization; just start decoding the data */ + while (src < buf + buf_size) { + + if (st) { + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F, 3); + *samples++ = adpcm_ima_expand_nibble(&c->status[1], + src[0] & 0x0F, 3); + } else { + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F, 3); + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + src[0] & 0x0F, 3); + } + + src++; + } + break; + case CODEC_ID_ADPCM_XA: + c->status[0].sample1 = c->status[0].sample2 = + c->status[1].sample1 = c->status[1].sample2 = 0; + while (buf_size >= 128) { + xa_decode(samples, src, &c->status[0], &c->status[1], + avctx->channels); + src += 128; + samples += 28 * 8; + buf_size -= 128; + } + break; + case CODEC_ID_ADPCM_EA: + samples_in_chunk = LE_32(src); + if (samples_in_chunk >= ((buf_size - 12) * 2)) { + src += buf_size; + break; + } + src += 4; + current_left_sample = (int16_t)LE_16(src); + src += 2; + previous_left_sample = (int16_t)LE_16(src); + src += 2; + current_right_sample = (int16_t)LE_16(src); + src += 2; + previous_right_sample = (int16_t)LE_16(src); + src += 2; + + for (count1 = 0; count1 < samples_in_chunk/28;count1++) { + coeff1l = ea_adpcm_table[(*src >> 4) & 0x0F]; + coeff2l = ea_adpcm_table[((*src >> 4) & 0x0F) + 4]; + coeff1r = ea_adpcm_table[*src & 0x0F]; + coeff2r = ea_adpcm_table[(*src & 0x0F) + 4]; + src++; + + shift_left = ((*src >> 4) & 0x0F) + 8; + shift_right = (*src & 0x0F) + 8; + src++; + + for (count2 = 0; count2 < 28; count2++) { + next_left_sample = (((*src & 0xF0) << 24) >> shift_left); + next_right_sample = (((*src & 0x0F) << 28) >> shift_right); + src++; + + next_left_sample = (next_left_sample + + (current_left_sample * coeff1l) + + (previous_left_sample * coeff2l) + 0x80) >> 8; + next_right_sample = (next_right_sample + + (current_right_sample * coeff1r) + + (previous_right_sample * coeff2r) + 0x80) >> 8; + CLAMP_TO_SHORT(next_left_sample); + CLAMP_TO_SHORT(next_right_sample); + + previous_left_sample = current_left_sample; + current_left_sample = next_left_sample; + previous_right_sample = current_right_sample; + current_right_sample = next_right_sample; + *samples++ = (unsigned short)current_left_sample; + *samples++ = (unsigned short)current_right_sample; + } + } + break; + case CODEC_ID_ADPCM_IMA_SMJPEG: + c->status[0].predictor = *src; + src += 2; + c->status[0].step_index = *src++; + src++; /* skip another byte before getting to the meat */ + while (src < buf + buf_size) { + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + *src & 0x0F, 3); + *samples++ = adpcm_ima_expand_nibble(&c->status[0], + (*src >> 4) & 0x0F, 3); + src++; + } + break; + case CODEC_ID_ADPCM_CT: + while (src < buf + buf_size) { + if (st) { + *samples++ = adpcm_ct_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F); + *samples++ = adpcm_ct_expand_nibble(&c->status[1], + src[0] & 0x0F); + } else { + *samples++ = adpcm_ct_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F); + *samples++ = adpcm_ct_expand_nibble(&c->status[0], + src[0] & 0x0F); + } + src++; + } + break; + case CODEC_ID_ADPCM_SBPRO_4: + case CODEC_ID_ADPCM_SBPRO_3: + case CODEC_ID_ADPCM_SBPRO_2: + if (!c->status[0].step_index) { + /* the first byte is a raw sample */ + *samples++ = 128 * (*src++ - 0x80); + if (st) + *samples++ = 128 * (*src++ - 0x80); + c->status[0].step_index = 1; + } + if (avctx->codec->id == CODEC_ID_ADPCM_SBPRO_4) { + while (src < buf + buf_size) { + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F, 4, 0); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[st], + src[0] & 0x0F, 4, 0); + src++; + } + } else if (avctx->codec->id == CODEC_ID_ADPCM_SBPRO_3) { + while (src < buf + buf_size) { + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 5) & 0x07, 3, 0); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 2) & 0x07, 3, 0); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + src[0] & 0x03, 2, 0); + src++; + } + } else { + while (src < buf + buf_size) { + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 6) & 0x03, 2, 2); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[st], + (src[0] >> 4) & 0x03, 2, 2); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 2) & 0x03, 2, 2); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[st], + src[0] & 0x03, 2, 2); + src++; + } + } + break; + case CODEC_ID_ADPCM_SWF: + { + GetBitContext gb; + const int *table; + int k0, signmask; + int size = buf_size*8; + + init_get_bits(&gb, buf, size); + + // first frame, read bits & inital values + if (!c->nb_bits) + { + c->nb_bits = get_bits(&gb, 2)+2; +// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits); + } + + table = swf_index_tables[c->nb_bits-2]; + k0 = 1 << (c->nb_bits-2); + signmask = 1 << (c->nb_bits-1); + + while (get_bits_count(&gb) <= size) + { + int i; + + c->nb_samples++; + // wrap around at every 4096 samples... + if ((c->nb_samples & 0xfff) == 1) + { + for (i = 0; i <= st; i++) + { + *samples++ = c->status[i].predictor = get_sbits(&gb, 16); + c->status[i].step_index = get_bits(&gb, 6); + } + } + + // similar to IMA adpcm + for (i = 0; i <= st; i++) + { + int delta = get_bits(&gb, c->nb_bits); + int step = step_table[c->status[i].step_index]; + long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 + int k = k0; + + do { + if (delta & k) + vpdiff += step; + step >>= 1; + k >>= 1; + } while(k); + vpdiff += step; + + if (delta & signmask) + c->status[i].predictor -= vpdiff; + else + c->status[i].predictor += vpdiff; + + c->status[i].step_index += table[delta & (~signmask)]; + + c->status[i].step_index = clip(c->status[i].step_index, 0, 88); + c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767); + + *samples++ = c->status[i].predictor; + } + } + +// src += get_bits_count(&gb)*8; + src += size; + + break; + } + case CODEC_ID_ADPCM_YAMAHA: + while (src < buf + buf_size) { + if (st) { + *samples++ = adpcm_yamaha_expand_nibble(&c->status[0], + src[0] & 0x0F); + *samples++ = adpcm_yamaha_expand_nibble(&c->status[1], + (src[0] >> 4) & 0x0F); + } else { + *samples++ = adpcm_yamaha_expand_nibble(&c->status[0], + src[0] & 0x0F); + *samples++ = adpcm_yamaha_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F); + } + src++; + } + break; + default: + return -1; + } + *data_size = (uint8_t *)samples - (uint8_t *)data; + return src - buf; +} + + + +#ifdef CONFIG_ENCODERS +#define ADPCM_ENCODER(id,name) \ +AVCodec name ## _encoder = { \ + #name, \ + CODEC_TYPE_AUDIO, \ + id, \ + sizeof(ADPCMContext), \ + adpcm_encode_init, \ + adpcm_encode_frame, \ + adpcm_encode_close, \ + NULL, \ +}; +#else +#define ADPCM_ENCODER(id,name) +#endif + +#ifdef CONFIG_DECODERS +#define ADPCM_DECODER(id,name) \ +AVCodec name ## _decoder = { \ + #name, \ + CODEC_TYPE_AUDIO, \ + id, \ + sizeof(ADPCMContext), \ + adpcm_decode_init, \ + NULL, \ + NULL, \ + adpcm_decode_frame, \ +}; +#else +#define ADPCM_DECODER(id,name) +#endif + +#define ADPCM_CODEC(id, name) \ +ADPCM_ENCODER(id,name) ADPCM_DECODER(id,name) + +ADPCM_CODEC(CODEC_ID_ADPCM_IMA_QT, adpcm_ima_qt); +ADPCM_CODEC(CODEC_ID_ADPCM_IMA_WAV, adpcm_ima_wav); +ADPCM_CODEC(CODEC_ID_ADPCM_IMA_DK3, adpcm_ima_dk3); +ADPCM_CODEC(CODEC_ID_ADPCM_IMA_DK4, adpcm_ima_dk4); +ADPCM_CODEC(CODEC_ID_ADPCM_IMA_WS, adpcm_ima_ws); +ADPCM_CODEC(CODEC_ID_ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg); +ADPCM_CODEC(CODEC_ID_ADPCM_MS, adpcm_ms); +ADPCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm); +ADPCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa); +ADPCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx); +ADPCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea); +ADPCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); +ADPCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); +ADPCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha); +ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4); +ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3); +ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2); + +#undef ADPCM_CODEC diff --git a/mpeg4/src/libavcodec/adx.c b/mpeg4/src/libavcodec/adx.c new file mode 100644 index 0000000000000000000000000000000000000000..c841e4eb8cbf1858728178afa8829c4e61e87c48 --- /dev/null +++ b/mpeg4/src/libavcodec/adx.c @@ -0,0 +1,410 @@ +/* + * ADX ADPCM codecs + * Copyright (c) 2001,2003 BERO + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "avcodec.h" + +/** + * @file adx.c + * SEGA CRI adx codecs. + * + * Reference documents: + * http://ku-www.ss.titech.ac.jp/~yatsushi/adx.html + * adx2wav & wav2adx http://www.geocities.co.jp/Playtown/2004/ + */ + +typedef struct { + int s1,s2; +} PREV; + +typedef struct { + PREV prev[2]; + int header_parsed; + unsigned char dec_temp[18*2]; + unsigned short enc_temp[32*2]; + int in_temp; +} ADXContext; + +//#define BASEVOL 0x11e0 +#define BASEVOL 0x4000 +#define SCALE1 0x7298 +#define SCALE2 0x3350 + +#define CLIP(s) if (s>32767) s=32767; else if (s<-32768) s=-32768 + +/* 18 bytes <-> 32 samples */ + +#ifdef CONFIG_ENCODERS +static void adx_encode(unsigned char *adx,const short *wav,PREV *prev) +{ + int scale; + int i; + int s0,s1,s2,d; + int max=0; + int min=0; + int data[32]; + + s1 = prev->s1; + s2 = prev->s2; + for(i=0;i<32;i++) { + s0 = wav[i]; + d = ((s0<<14) - SCALE1*s1 + SCALE2*s2)/BASEVOL; + data[i]=d; + if (maxd) min=d; + s2 = s1; + s1 = s0; + } + prev->s1 = s1; + prev->s2 = s2; + + /* -8..+7 */ + + if (max==0 && min==0) { + memset(adx,0,18); + return; + } + + if (max/7>-min/8) scale = max/7; + else scale = -min/8; + + if (scale==0) scale=1; + + adx[0] = scale>>8; + adx[1] = scale; + + for(i=0;i<16;i++) { + adx[i+2] = ((data[i*2]/scale)<<4) | ((data[i*2+1]/scale)&0xf); + } +} +#endif //CONFIG_ENCODERS + +static void adx_decode(short *out,const unsigned char *in,PREV *prev) +{ + int scale = ((in[0]<<8)|(in[1])); + int i; + int s0,s1,s2,d; + +// printf("%x ",scale); + + in+=2; + s1 = prev->s1; + s2 = prev->s2; + for(i=0;i<16;i++) { + d = in[i]; + // d>>=4; if (d&8) d-=16; + d = ((signed char)d >> 4); + s0 = (BASEVOL*d*scale + SCALE1*s1 - SCALE2*s2)>>14; + CLIP(s0); + *out++=s0; + s2 = s1; + s1 = s0; + + d = in[i]; + //d&=15; if (d&8) d-=16; + d = ((signed char)(d<<4) >> 4); + s0 = (BASEVOL*d*scale + SCALE1*s1 - SCALE2*s2)>>14; + CLIP(s0); + *out++=s0; + s2 = s1; + s1 = s0; + } + prev->s1 = s1; + prev->s2 = s2; + +} + +static void adx_decode_stereo(short *out,const unsigned char *in,PREV *prev) +{ + short tmp[32*2]; + int i; + + adx_decode(tmp ,in ,prev); + adx_decode(tmp+32,in+18,prev+1); + for(i=0;i<32;i++) { + out[i*2] = tmp[i]; + out[i*2+1] = tmp[i+32]; + } +} + +#ifdef CONFIG_ENCODERS + +static void write_long(unsigned char *p,uint32_t v) +{ + p[0] = v>>24; + p[1] = v>>16; + p[2] = v>>8; + p[3] = v; +} + +static int adx_encode_header(AVCodecContext *avctx,unsigned char *buf,size_t bufsize) +{ +#if 0 + struct { + uint32_t offset; /* 0x80000000 + sample start - 4 */ + unsigned char unknown1[3]; /* 03 12 04 */ + unsigned char channel; /* 1 or 2 */ + uint32_t freq; + uint32_t size; + uint32_t unknown2; /* 01 f4 03 00 */ + uint32_t unknown3; /* 00 00 00 00 */ + uint32_t unknown4; /* 00 00 00 00 */ + + /* if loop + unknown3 00 15 00 01 + unknown4 00 00 00 01 + long loop_start_sample; + long loop_start_byte; + long loop_end_sample; + long loop_end_byte; + long + */ + } adxhdr; /* big endian */ + /* offset-6 "(c)CRI" */ +#endif + write_long(buf+0x00,0x80000000|0x20); + write_long(buf+0x04,0x03120400|avctx->channels); + write_long(buf+0x08,avctx->sample_rate); + write_long(buf+0x0c,0); /* FIXME: set after */ + write_long(buf+0x10,0x01040300); + write_long(buf+0x14,0x00000000); + write_long(buf+0x18,0x00000000); + memcpy(buf+0x1c,"\0\0(c)CRI",8); + return 0x20+4; +} + +static int adx_decode_init(AVCodecContext *avctx); +static int adx_encode_init(AVCodecContext *avctx) +{ + if (avctx->channels > 2) + return -1; /* only stereo or mono =) */ + avctx->frame_size = 32; + + avctx->coded_frame= avcodec_alloc_frame(); + avctx->coded_frame->key_frame= 1; + +// avctx->bit_rate = avctx->sample_rate*avctx->channels*18*8/32; + + av_log(avctx, AV_LOG_DEBUG, "adx encode init\n"); + adx_decode_init(avctx); + + return 0; +} + +static int adx_encode_close(AVCodecContext *avctx) +{ + av_freep(&avctx->coded_frame); + + return 0; +} + +static int adx_encode_frame(AVCodecContext *avctx, + uint8_t *frame, int buf_size, void *data) +{ + ADXContext *c = avctx->priv_data; + const short *samples = data; + unsigned char *dst = frame; + int rest = avctx->frame_size; + +/* + input data size = + ffmpeg.c: do_audio_out() + frame_bytes = enc->frame_size * 2 * enc->channels; +*/ + +// printf("sz=%d ",buf_size); fflush(stdout); + if (!c->header_parsed) { + int hdrsize = adx_encode_header(avctx,dst,buf_size); + dst+=hdrsize; + c->header_parsed = 1; + } + + if (avctx->channels==1) { + while(rest>=32) { + adx_encode(dst,samples,c->prev); + dst+=18; + samples+=32; + rest-=32; + } + } else { + while(rest>=32*2) { + short tmpbuf[32*2]; + int i; + + for(i=0;i<32;i++) { + tmpbuf[i] = samples[i*2]; + tmpbuf[i+32] = samples[i*2+1]; + } + + adx_encode(dst,tmpbuf,c->prev); + adx_encode(dst+18,tmpbuf+32,c->prev+1); + dst+=18*2; + samples+=32*2; + rest-=32*2; + } + } + return dst-frame; +} + +#endif //CONFIG_ENCODERS + +static uint32_t read_long(const unsigned char *p) +{ + return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; +} + +int is_adx(const unsigned char *buf,size_t bufsize) +{ + int offset; + + if (buf[0]!=0x80) return 0; + offset = (read_long(buf)^0x80000000)+4; + if (bufsizesample_rate = freq; + avctx->channels = channels; + avctx->bit_rate = freq*channels*18*8/32; +// avctx->frame_size = 18*channels; + + return offset; +} + +static int adx_decode_init(AVCodecContext * avctx) +{ + ADXContext *c = avctx->priv_data; + +// printf("adx_decode_init\n"); fflush(stdout); + c->prev[0].s1 = 0; + c->prev[0].s2 = 0; + c->prev[1].s1 = 0; + c->prev[1].s2 = 0; + c->header_parsed = 0; + c->in_temp = 0; + return 0; +} + +#if 0 +static void dump(unsigned char *buf,size_t len) +{ + int i; + for(i=0;ipriv_data; + short *samples = data; + const uint8_t *buf = buf0; + int rest = buf_size; + + if (!c->header_parsed) { + int hdrsize = adx_decode_header(avctx,buf,rest); + if (hdrsize==0) return -1; + c->header_parsed = 1; + buf += hdrsize; + rest -= hdrsize; + } + + if (c->in_temp) { + int copysize = 18*avctx->channels - c->in_temp; + memcpy(c->dec_temp+c->in_temp,buf,copysize); + rest -= copysize; + buf += copysize; + if (avctx->channels==1) { + adx_decode(samples,c->dec_temp,c->prev); + samples += 32; + } else { + adx_decode_stereo(samples,c->dec_temp,c->prev); + samples += 32*2; + } + } + // + if (avctx->channels==1) { + while(rest>=18) { + adx_decode(samples,buf,c->prev); + rest-=18; + buf+=18; + samples+=32; + } + } else { + while(rest>=18*2) { + adx_decode_stereo(samples,buf,c->prev); + rest-=18*2; + buf+=18*2; + samples+=32*2; + } + } + // + c->in_temp = rest; + if (rest) { + memcpy(c->dec_temp,buf,rest); + buf+=rest; + } + *data_size = (uint8_t*)samples - (uint8_t*)data; +// printf("%d:%d ",buf-buf0,*data_size); fflush(stdout); + return buf-buf0; +} + +#ifdef CONFIG_ENCODERS +AVCodec adx_adpcm_encoder = { + "adx_adpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_ADPCM_ADX, + sizeof(ADXContext), + adx_encode_init, + adx_encode_frame, + adx_encode_close, + NULL, +}; +#endif //CONFIG_ENCODERS + +AVCodec adx_adpcm_decoder = { + "adx_adpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_ADPCM_ADX, + sizeof(ADXContext), + adx_decode_init, + NULL, + NULL, + adx_decode_frame, +}; + diff --git a/mpeg4/src/libavcodec/alac.c b/mpeg4/src/libavcodec/alac.c new file mode 100644 index 0000000000000000000000000000000000000000..ab2ffb9e5938a880cb3780435b12cb738bd42099 --- /dev/null +++ b/mpeg4/src/libavcodec/alac.c @@ -0,0 +1,849 @@ +/* + * ALAC (Apple Lossless Audio Codec) decoder + * Copyright (c) 2005 David Hammerton + * All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file alac.c + * ALAC (Apple Lossless Audio Codec) decoder + * @author 2005 David Hammerton + * + * For more information on the ALAC format, visit: + * http://crazney.net/programs/itunes/alac.html + * + * Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be + * passed through the extradata[_size] fields. This atom is tacked onto + * the end of an 'alac' stsd atom and has the following format: + * bytes 0-3 atom size (0x24), big-endian + * bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) + * bytes 8-35 data bytes needed by decoder + * + * Extradata: + * 32bit size + * 32bit tag (=alac) + * 32bit zero? + * 32bit max sample per frame + * 8bit ?? (zero?) + * 8bit sample size + * 8bit history mult + * 8bit initial history + * 8bit kmodifier + * 8bit channels? + * 16bit ?? + * 32bit max coded frame size + * 32bit bitrate? + * 32bit samplerate + */ + + +#include "avcodec.h" +#include "bitstream.h" + +#define ALAC_EXTRADATA_SIZE 36 + +typedef struct { + + AVCodecContext *avctx; + GetBitContext gb; + /* init to 0; first frame decode should initialize from extradata and + * set this to 1 */ + int context_initialized; + + int samplesize; + int numchannels; + int bytespersample; + + /* buffers */ + int32_t *predicterror_buffer_a; + int32_t *predicterror_buffer_b; + + int32_t *outputsamples_buffer_a; + int32_t *outputsamples_buffer_b; + + /* stuff from setinfo */ + uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */ + uint8_t setinfo_7a; /* 0x00 */ + uint8_t setinfo_sample_size; /* 0x10 */ + uint8_t setinfo_rice_historymult; /* 0x28 */ + uint8_t setinfo_rice_initialhistory; /* 0x0a */ + uint8_t setinfo_rice_kmodifier; /* 0x0e */ + uint8_t setinfo_7f; /* 0x02 */ + uint16_t setinfo_80; /* 0x00ff */ + uint32_t setinfo_82; /* 0x000020e7 */ + uint32_t setinfo_86; /* 0x00069fe4 */ + uint32_t setinfo_8a_rate; /* 0x0000ac44 */ + /* end setinfo stuff */ + +} ALACContext; + +static void allocate_buffers(ALACContext *alac) +{ + alac->predicterror_buffer_a = av_malloc(alac->setinfo_max_samples_per_frame * 4); + alac->predicterror_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4); + + alac->outputsamples_buffer_a = av_malloc(alac->setinfo_max_samples_per_frame * 4); + alac->outputsamples_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4); +} + +static void alac_set_info(ALACContext *alac) +{ + unsigned char *ptr = alac->avctx->extradata; + + ptr += 4; /* size */ + ptr += 4; /* alac */ + ptr += 4; /* 0 ? */ + + alac->setinfo_max_samples_per_frame = BE_32(ptr); /* buffer size / 2 ? */ + ptr += 4; + alac->setinfo_7a = *ptr++; + alac->setinfo_sample_size = *ptr++; + alac->setinfo_rice_historymult = *ptr++; + alac->setinfo_rice_initialhistory = *ptr++; + alac->setinfo_rice_kmodifier = *ptr++; + alac->setinfo_7f = *ptr++; // channels? + alac->setinfo_80 = BE_16(ptr); + ptr += 2; + alac->setinfo_82 = BE_32(ptr); // max coded frame size + ptr += 4; + alac->setinfo_86 = BE_32(ptr); // bitrate ? + ptr += 4; + alac->setinfo_8a_rate = BE_32(ptr); // samplerate + ptr += 4; + + allocate_buffers(alac); +} + +/* hideously inefficient. could use a bitmask search, + * alternatively bsr on x86, + */ +static int count_leading_zeros(int32_t input) +{ + int i = 0; + while (!(0x80000000 & input) && i < 32) { + i++; + input = input << 1; + } + return i; +} + +static void bastardized_rice_decompress(ALACContext *alac, + int32_t *output_buffer, + int output_size, + int readsamplesize, /* arg_10 */ + int rice_initialhistory, /* arg424->b */ + int rice_kmodifier, /* arg424->d */ + int rice_historymult, /* arg424->c */ + int rice_kmodifier_mask /* arg424->e */ + ) +{ + int output_count; + unsigned int history = rice_initialhistory; + int sign_modifier = 0; + + for (output_count = 0; output_count < output_size; output_count++) { + int32_t x = 0; + int32_t x_modified; + int32_t final_val; + + /* read x - number of 1s before 0 represent the rice */ + while (x <= 8 && get_bits1(&alac->gb)) { + x++; + } + + + if (x > 8) { /* RICE THRESHOLD */ + /* use alternative encoding */ + int32_t value; + + value = get_bits(&alac->gb, readsamplesize); + + /* mask value to readsamplesize size */ + if (readsamplesize != 32) + value &= (0xffffffff >> (32 - readsamplesize)); + + x = value; + } else { + /* standard rice encoding */ + int extrabits; + int k; /* size of extra bits */ + + /* read k, that is bits as is */ + k = 31 - rice_kmodifier - count_leading_zeros((history >> 9) + 3); + + if (k < 0) + k += rice_kmodifier; + else + k = rice_kmodifier; + + if (k != 1) { + extrabits = show_bits(&alac->gb, k); + + /* multiply x by 2^k - 1, as part of their strange algorithm */ + x = (x << k) - x; + + if (extrabits > 1) { + x += extrabits - 1; + get_bits(&alac->gb, k); + } else { + get_bits(&alac->gb, k - 1); + } + } + } + + x_modified = sign_modifier + x; + final_val = (x_modified + 1) / 2; + if (x_modified & 1) final_val *= -1; + + output_buffer[output_count] = final_val; + + sign_modifier = 0; + + /* now update the history */ + history += (x_modified * rice_historymult) + - ((history * rice_historymult) >> 9); + + if (x_modified > 0xffff) + history = 0xffff; + + /* special case: there may be compressed blocks of 0 */ + if ((history < 128) && (output_count+1 < output_size)) { + int block_size; + + sign_modifier = 1; + + x = 0; + while (x <= 8 && get_bits1(&alac->gb)) { + x++; + } + + if (x > 8) { + block_size = get_bits(&alac->gb, 16); + block_size &= 0xffff; + } else { + int k; + int extrabits; + + k = count_leading_zeros(history) + ((history + 16) >> 6 /* / 64 */) - 24; + + extrabits = show_bits(&alac->gb, k); + + block_size = (((1 << k) - 1) & rice_kmodifier_mask) * x + + extrabits - 1; + + if (extrabits < 2) { + x = 1 - extrabits; + block_size += x; + get_bits(&alac->gb, k - 1); + } else { + get_bits(&alac->gb, k); + } + } + + if (block_size > 0) { + memset(&output_buffer[output_count+1], 0, block_size * 4); + output_count += block_size; + + } + + if (block_size > 0xffff) + sign_modifier = 0; + + history = 0; + } + } +} + +#define SIGN_EXTENDED32(val, bits) ((val << (32 - bits)) >> (32 - bits)) + +#define SIGN_ONLY(v) \ + ((v < 0) ? (-1) : \ + ((v > 0) ? (1) : \ + (0))) + +static void predictor_decompress_fir_adapt(int32_t *error_buffer, + int32_t *buffer_out, + int output_size, + int readsamplesize, + int16_t *predictor_coef_table, + int predictor_coef_num, + int predictor_quantitization) +{ + int i; + + /* first sample always copies */ + *buffer_out = *error_buffer; + + if (!predictor_coef_num) { + if (output_size <= 1) return; + memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4); + return; + } + + if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */ + /* second-best case scenario for fir decompression, + * error describes a small difference from the previous sample only + */ + if (output_size <= 1) return; + for (i = 0; i < output_size - 1; i++) { + int32_t prev_value; + int32_t error_value; + + prev_value = buffer_out[i]; + error_value = error_buffer[i+1]; + buffer_out[i+1] = SIGN_EXTENDED32((prev_value + error_value), readsamplesize); + } + return; + } + + /* read warm-up samples */ + if (predictor_coef_num > 0) { + int i; + for (i = 0; i < predictor_coef_num; i++) { + int32_t val; + + val = buffer_out[i] + error_buffer[i+1]; + + val = SIGN_EXTENDED32(val, readsamplesize); + + buffer_out[i+1] = val; + } + } + +#if 0 + /* 4 and 8 are very common cases (the only ones i've seen). these + * should be unrolled and optimised + */ + if (predictor_coef_num == 4) { + /* FIXME: optimised general case */ + return; + } + + if (predictor_coef_table == 8) { + /* FIXME: optimised general case */ + return; + } +#endif + + + /* general case */ + if (predictor_coef_num > 0) { + for (i = predictor_coef_num + 1; + i < output_size; + i++) { + int j; + int sum = 0; + int outval; + int error_val = error_buffer[i]; + + for (j = 0; j < predictor_coef_num; j++) { + sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) * + predictor_coef_table[j]; + } + + outval = (1 << (predictor_quantitization-1)) + sum; + outval = outval >> predictor_quantitization; + outval = outval + buffer_out[0] + error_val; + outval = SIGN_EXTENDED32(outval, readsamplesize); + + buffer_out[predictor_coef_num+1] = outval; + + if (error_val > 0) { + int predictor_num = predictor_coef_num - 1; + + while (predictor_num >= 0 && error_val > 0) { + int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; + int sign = SIGN_ONLY(val); + + predictor_coef_table[predictor_num] -= sign; + + val *= sign; /* absolute value */ + + error_val -= ((val >> predictor_quantitization) * + (predictor_coef_num - predictor_num)); + + predictor_num--; + } + } else if (error_val < 0) { + int predictor_num = predictor_coef_num - 1; + + while (predictor_num >= 0 && error_val < 0) { + int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; + int sign = - SIGN_ONLY(val); + + predictor_coef_table[predictor_num] -= sign; + + val *= sign; /* neg value */ + + error_val -= ((val >> predictor_quantitization) * + (predictor_coef_num - predictor_num)); + + predictor_num--; + } + } + + buffer_out++; + } + } +} + +void deinterlace_16(int32_t *buffer_a, int32_t *buffer_b, + int16_t *buffer_out, + int numchannels, int numsamples, + uint8_t interlacing_shift, + uint8_t interlacing_leftweight) +{ + int i; + if (numsamples <= 0) return; + + /* weighted interlacing */ + if (interlacing_leftweight) { + for (i = 0; i < numsamples; i++) { + int32_t difference, midright; + int16_t left; + int16_t right; + + midright = buffer_a[i]; + difference = buffer_b[i]; + + + right = midright - ((difference * interlacing_leftweight) >> interlacing_shift); + left = (midright - ((difference * interlacing_leftweight) >> interlacing_shift)) + + difference; + + buffer_out[i*numchannels] = left; + buffer_out[i*numchannels + 1] = right; + } + + return; + } + + /* otherwise basic interlacing took place */ + for (i = 0; i < numsamples; i++) { + int16_t left, right; + + left = buffer_a[i]; + right = buffer_b[i]; + + buffer_out[i*numchannels] = left; + buffer_out[i*numchannels + 1] = right; + } +} + +static int alac_decode_frame(AVCodecContext *avctx, + void *outbuffer, int *outputsize, + uint8_t *inbuffer, int input_buffer_size) +{ + ALACContext *alac = avctx->priv_data; + + int channels; + int32_t outputsamples; + + /* short-circuit null buffers */ + if (!inbuffer || !input_buffer_size) + return input_buffer_size; + + /* initialize from the extradata */ + if (!alac->context_initialized) { + if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { + av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", + ALAC_EXTRADATA_SIZE); + return input_buffer_size; + } + alac_set_info(alac); + alac->context_initialized = 1; + } + + outputsamples = alac->setinfo_max_samples_per_frame; + + init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8); + + channels = get_bits(&alac->gb, 3); + + *outputsize = outputsamples * alac->bytespersample; + + switch(channels) { + case 0: { /* 1 channel */ + int hassize; + int isnotcompressed; + int readsamplesize; + + int wasted_bytes; + int ricemodifier; + + + /* 2^result = something to do with output waiting. + * perhaps matters if we read > 1 frame in a pass? + */ + get_bits(&alac->gb, 4); + + get_bits(&alac->gb, 12); /* unknown, skip 12 bits */ + + hassize = get_bits(&alac->gb, 1); /* the output sample size is stored soon */ + + wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ + + isnotcompressed = get_bits(&alac->gb, 1); /* whether the frame is compressed */ + + if (hassize) { + /* now read the number of samples, + * as a 32bit integer */ + outputsamples = get_bits(&alac->gb, 32); + *outputsize = outputsamples * alac->bytespersample; + } + + readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8); + + if (!isnotcompressed) { + /* so it is compressed */ + int16_t predictor_coef_table[32]; + int predictor_coef_num; + int prediction_type; + int prediction_quantitization; + int i; + + /* FIXME: skip 16 bits, not sure what they are. seem to be used in + * two channel case */ + get_bits(&alac->gb, 8); + get_bits(&alac->gb, 8); + + prediction_type = get_bits(&alac->gb, 4); + prediction_quantitization = get_bits(&alac->gb, 4); + + ricemodifier = get_bits(&alac->gb, 3); + predictor_coef_num = get_bits(&alac->gb, 5); + + /* read the predictor table */ + for (i = 0; i < predictor_coef_num; i++) { + predictor_coef_table[i] = (int16_t)get_bits(&alac->gb, 16); + } + + if (wasted_bytes) { + /* these bytes seem to have something to do with + * > 2 channel files. + */ + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); + } + + bastardized_rice_decompress(alac, + alac->predicterror_buffer_a, + outputsamples, + readsamplesize, + alac->setinfo_rice_initialhistory, + alac->setinfo_rice_kmodifier, + ricemodifier * alac->setinfo_rice_historymult / 4, + (1 << alac->setinfo_rice_kmodifier) - 1); + + if (prediction_type == 0) { + /* adaptive fir */ + predictor_decompress_fir_adapt(alac->predicterror_buffer_a, + alac->outputsamples_buffer_a, + outputsamples, + readsamplesize, + predictor_coef_table, + predictor_coef_num, + prediction_quantitization); + } else { + av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type); + /* i think the only other prediction type (or perhaps this is just a + * boolean?) runs adaptive fir twice.. like: + * predictor_decompress_fir_adapt(predictor_error, tempout, ...) + * predictor_decompress_fir_adapt(predictor_error, outputsamples ...) + * little strange.. + */ + } + + } else { + /* not compressed, easy case */ + if (readsamplesize <= 16) { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits = get_bits(&alac->gb, readsamplesize); + + audiobits = SIGN_EXTENDED32(audiobits, readsamplesize); + + alac->outputsamples_buffer_a[i] = audiobits; + } + } else { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits; + + audiobits = get_bits(&alac->gb, 16); + /* special case of sign extension.. + * as we'll be ORing the low 16bits into this */ + audiobits = audiobits << 16; + audiobits = audiobits >> (32 - readsamplesize); + + audiobits |= get_bits(&alac->gb, readsamplesize - 16); + + alac->outputsamples_buffer_a[i] = audiobits; + } + } + /* wasted_bytes = 0; // unused */ + } + + switch(alac->setinfo_sample_size) { + case 16: { + int i; + for (i = 0; i < outputsamples; i++) { + int16_t sample = alac->outputsamples_buffer_a[i]; + sample = be2me_16(sample); + ((int16_t*)outbuffer)[i * alac->numchannels] = sample; + } + break; + } + case 20: + case 24: + case 32: + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); + break; + default: + break; + } + break; + } + case 1: { /* 2 channels */ + int hassize; + int isnotcompressed; + int readsamplesize; + + int wasted_bytes; + + uint8_t interlacing_shift; + uint8_t interlacing_leftweight; + + /* 2^result = something to do with output waiting. + * perhaps matters if we read > 1 frame in a pass? + */ + get_bits(&alac->gb, 4); + + get_bits(&alac->gb, 12); /* unknown, skip 12 bits */ + + hassize = get_bits(&alac->gb, 1); /* the output sample size is stored soon */ + + wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ + + isnotcompressed = get_bits(&alac->gb, 1); /* whether the frame is compressed */ + + if (hassize) { + /* now read the number of samples, + * as a 32bit integer */ + outputsamples = get_bits(&alac->gb, 32); + *outputsize = outputsamples * alac->bytespersample; + } + + readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8) + 1; + + if (!isnotcompressed) { + /* compressed */ + int16_t predictor_coef_table_a[32]; + int predictor_coef_num_a; + int prediction_type_a; + int prediction_quantitization_a; + int ricemodifier_a; + + int16_t predictor_coef_table_b[32]; + int predictor_coef_num_b; + int prediction_type_b; + int prediction_quantitization_b; + int ricemodifier_b; + + int i; + + interlacing_shift = get_bits(&alac->gb, 8); + interlacing_leftweight = get_bits(&alac->gb, 8); + + /******** channel 1 ***********/ + prediction_type_a = get_bits(&alac->gb, 4); + prediction_quantitization_a = get_bits(&alac->gb, 4); + + ricemodifier_a = get_bits(&alac->gb, 3); + predictor_coef_num_a = get_bits(&alac->gb, 5); + + /* read the predictor table */ + for (i = 0; i < predictor_coef_num_a; i++) { + predictor_coef_table_a[i] = (int16_t)get_bits(&alac->gb, 16); + } + + /******** channel 2 *********/ + prediction_type_b = get_bits(&alac->gb, 4); + prediction_quantitization_b = get_bits(&alac->gb, 4); + + ricemodifier_b = get_bits(&alac->gb, 3); + predictor_coef_num_b = get_bits(&alac->gb, 5); + + /* read the predictor table */ + for (i = 0; i < predictor_coef_num_b; i++) { + predictor_coef_table_b[i] = (int16_t)get_bits(&alac->gb, 16); + } + + /*********************/ + if (wasted_bytes) { + /* see mono case */ + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); + } + + /* channel 1 */ + bastardized_rice_decompress(alac, + alac->predicterror_buffer_a, + outputsamples, + readsamplesize, + alac->setinfo_rice_initialhistory, + alac->setinfo_rice_kmodifier, + ricemodifier_a * alac->setinfo_rice_historymult / 4, + (1 << alac->setinfo_rice_kmodifier) - 1); + + if (prediction_type_a == 0) { + /* adaptive fir */ + predictor_decompress_fir_adapt(alac->predicterror_buffer_a, + alac->outputsamples_buffer_a, + outputsamples, + readsamplesize, + predictor_coef_table_a, + predictor_coef_num_a, + prediction_quantitization_a); + } else { + /* see mono case */ + av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_a); + } + + /* channel 2 */ + bastardized_rice_decompress(alac, + alac->predicterror_buffer_b, + outputsamples, + readsamplesize, + alac->setinfo_rice_initialhistory, + alac->setinfo_rice_kmodifier, + ricemodifier_b * alac->setinfo_rice_historymult / 4, + (1 << alac->setinfo_rice_kmodifier) - 1); + + if (prediction_type_b == 0) { + /* adaptive fir */ + predictor_decompress_fir_adapt(alac->predicterror_buffer_b, + alac->outputsamples_buffer_b, + outputsamples, + readsamplesize, + predictor_coef_table_b, + predictor_coef_num_b, + prediction_quantitization_b); + } else { + av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_b); + } + } else { + /* not compressed, easy case */ + if (alac->setinfo_sample_size <= 16) { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits_a, audiobits_b; + + audiobits_a = get_bits(&alac->gb, alac->setinfo_sample_size); + audiobits_b = get_bits(&alac->gb, alac->setinfo_sample_size); + + audiobits_a = SIGN_EXTENDED32(audiobits_a, alac->setinfo_sample_size); + audiobits_b = SIGN_EXTENDED32(audiobits_b, alac->setinfo_sample_size); + + alac->outputsamples_buffer_a[i] = audiobits_a; + alac->outputsamples_buffer_b[i] = audiobits_b; + } + } else { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits_a, audiobits_b; + + audiobits_a = get_bits(&alac->gb, 16); + audiobits_a = audiobits_a << 16; + audiobits_a = audiobits_a >> (32 - alac->setinfo_sample_size); + audiobits_a |= get_bits(&alac->gb, alac->setinfo_sample_size - 16); + + audiobits_b = get_bits(&alac->gb, 16); + audiobits_b = audiobits_b << 16; + audiobits_b = audiobits_b >> (32 - alac->setinfo_sample_size); + audiobits_b |= get_bits(&alac->gb, alac->setinfo_sample_size - 16); + + alac->outputsamples_buffer_a[i] = audiobits_a; + alac->outputsamples_buffer_b[i] = audiobits_b; + } + } + /* wasted_bytes = 0; */ + interlacing_shift = 0; + interlacing_leftweight = 0; + } + + switch(alac->setinfo_sample_size) { + case 16: { + deinterlace_16(alac->outputsamples_buffer_a, + alac->outputsamples_buffer_b, + (int16_t*)outbuffer, + alac->numchannels, + outputsamples, + interlacing_shift, + interlacing_leftweight); + break; + } + case 20: + case 24: + case 32: + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); + break; + default: + break; + } + + break; + } + } + + return input_buffer_size; +} + +static int alac_decode_init(AVCodecContext * avctx) +{ + ALACContext *alac = avctx->priv_data; + alac->avctx = avctx; + alac->context_initialized = 0; + + alac->samplesize = alac->avctx->bits_per_sample; + alac->numchannels = alac->avctx->channels; + alac->bytespersample = (alac->samplesize / 8) * alac->numchannels; + + return 0; +} + +static int alac_decode_close(AVCodecContext *avctx) +{ + ALACContext *alac = avctx->priv_data; + + av_free(alac->predicterror_buffer_a); + av_free(alac->predicterror_buffer_b); + + av_free(alac->outputsamples_buffer_a); + av_free(alac->outputsamples_buffer_b); + + return 0; +} + +AVCodec alac_decoder = { + "alac", + CODEC_TYPE_AUDIO, + CODEC_ID_ALAC, + sizeof(ALACContext), + alac_decode_init, + NULL, + alac_decode_close, + alac_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/allcodecs.c b/mpeg4/src/libavcodec/allcodecs.c new file mode 100644 index 0000000000000000000000000000000000000000..f2d752095e4efb0b406a52fce004225df23bd666 --- /dev/null +++ b/mpeg4/src/libavcodec/allcodecs.c @@ -0,0 +1,655 @@ +/* + * Utils for libavcodec + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file allcodecs.c + * Utils for libavcodec. + */ + +#include "avcodec.h" + +/* If you do not call this function, then you can select exactly which + formats you want to support */ + +/** + * simple call to register all the codecs. + */ +void avcodec_register_all(void) +{ + static int inited = 0; + + if (inited != 0) + return; + inited = 1; + + /* encoders */ +#ifdef CONFIG_ENCODERS +#ifdef CONFIG_AC3_ENCODER + register_avcodec(&ac3_encoder); +#endif //CONFIG_AC3_ENCODER +#ifdef CONFIG_MP2_ENCODER + register_avcodec(&mp2_encoder); +#endif //CONFIG_MP2_ENCODER +#ifdef CONFIG_MP3LAME +#ifdef CONFIG_MP3LAME_ENCODER + register_avcodec(&mp3lame_encoder); +#endif //CONFIG_MP3LAME_ENCODER +#endif +#ifdef CONFIG_LIBVORBIS +#ifdef CONFIG_OGGVORBIS_ENCODER + register_avcodec(&oggvorbis_encoder); +#endif //CONFIG_OGGVORBIS_ENCODER +#if (defined CONFIG_OGGVORBIS_DECODER && !defined CONFIG_VORBIS_DECODER) + register_avcodec(&oggvorbis_decoder); +#endif //CONFIG_OGGVORBIS_DECODER +#endif +#ifdef CONFIG_LIBTHEORA +#ifdef CONFIG_OGGTHEORA_ENCODER +// register_avcodec(&oggtheora_encoder); +#endif //CONFIG_OGGTHEORA_ENCODER +#ifdef CONFIG_OGGTHEORA_DECODER + register_avcodec(&oggtheora_decoder); +#endif //CONFIG_OGGTHEORA_DECODER +#endif +#ifdef CONFIG_FAAC +#ifdef CONFIG_FAAC_ENCODER + register_avcodec(&faac_encoder); +#endif //CONFIG_FAAC_ENCODER +#endif +#ifdef CONFIG_XVID +#ifdef CONFIG_XVID_ENCODER + register_avcodec(&xvid_encoder); +#endif //CONFIG_XVID_ENCODER +#endif +#ifdef CONFIG_MPEG1VIDEO_ENCODER + register_avcodec(&mpeg1video_encoder); +#endif //CONFIG_MPEG1VIDEO_ENCODER +#ifdef CONFIG_H264_ENCODER +// register_avcodec(&h264_encoder); +#endif //CONFIG_H264_ENCODER +#ifdef CONFIG_MPEG2VIDEO_ENCODER + register_avcodec(&mpeg2video_encoder); +#endif //CONFIG_MPEG2VIDEO_ENCODER +#ifdef CONFIG_H261_ENCODER + register_avcodec(&h261_encoder); +#endif //CONFIG_H261_ENCODER +#ifdef CONFIG_H263_ENCODER + register_avcodec(&h263_encoder); +#endif //CONFIG_H263_ENCODER +#ifdef CONFIG_H263P_ENCODER + register_avcodec(&h263p_encoder); +#endif //CONFIG_H263P_ENCODER +#ifdef CONFIG_FLV_ENCODER + register_avcodec(&flv_encoder); +#endif //CONFIG_FLV_ENCODER +#ifdef CONFIG_RV10_ENCODER + register_avcodec(&rv10_encoder); +#endif //CONFIG_RV10_ENCODER +#ifdef CONFIG_RV20_ENCODER + register_avcodec(&rv20_encoder); +#endif //CONFIG_RV20_ENCODER +#ifdef CONFIG_MPEG4_ENCODER + register_avcodec(&mpeg4_encoder); +#endif //CONFIG_MPEG4_ENCODER +#ifdef CONFIG_MSMPEG4V1_ENCODER + register_avcodec(&msmpeg4v1_encoder); +#endif //CONFIG_MSMPEG4V1_ENCODER +#ifdef CONFIG_MSMPEG4V2_ENCODER + register_avcodec(&msmpeg4v2_encoder); +#endif //CONFIG_MSMPEG4V2_ENCODER +#ifdef CONFIG_MSMPEG4V3_ENCODER + register_avcodec(&msmpeg4v3_encoder); +#endif //CONFIG_MSMPEG4V3_ENCODER +#ifdef CONFIG_WMV1_ENCODER + register_avcodec(&wmv1_encoder); +#endif //CONFIG_WMV1_ENCODER +#ifdef CONFIG_WMV2_ENCODER + register_avcodec(&wmv2_encoder); +#endif //CONFIG_WMV2_ENCODER +#ifdef CONFIG_SVQ1_ENCODER + register_avcodec(&svq1_encoder); +#endif //CONFIG_SVQ1_ENCODER +#ifdef CONFIG_MJPEG_ENCODER + register_avcodec(&mjpeg_encoder); +#endif //CONFIG_MJPEG_ENCODER +#ifdef CONFIG_LJPEG_ENCODER + register_avcodec(&ljpeg_encoder); +#endif //CONFIG_LJPEG_ENCODER +#ifdef CONFIG_JPEGLS_ENCODER + register_avcodec(&jpegls_encoder); +#endif //CONFIG_JPEGLS_ENCODER +#ifdef CONFIG_ZLIB +#ifdef CONFIG_PNG_ENCODER + register_avcodec(&png_encoder); +#endif //CONFIG_PNG_ENCODER +#endif +#ifdef CONFIG_PPM_ENCODER + register_avcodec(&ppm_encoder); +#endif //CONFIG_PPM_ENCODER +#ifdef CONFIG_PGM_ENCODER + register_avcodec(&pgm_encoder); +#endif //CONFIG_PGM_ENCODER +#ifdef CONFIG_PGMYUV_ENCODER + register_avcodec(&pgmyuv_encoder); +#endif //CONFIG_PGMYUV_ENCODER +#ifdef CONFIG_PBM_ENCODER + register_avcodec(&pbm_encoder); +#endif //CONFIG_PBM_ENCODER +#ifdef CONFIG_PAM_ENCODER + register_avcodec(&pam_encoder); +#endif //CONFIG_PAM_ENCODER +#ifdef CONFIG_HUFFYUV_ENCODER + register_avcodec(&huffyuv_encoder); +#endif //CONFIG_HUFFYUV_ENCODER +#ifdef CONFIG_FFVHUFF_ENCODER + register_avcodec(&ffvhuff_encoder); +#endif //CONFIG_FFVHUFF_ENCODER +#ifdef CONFIG_ASV1_ENCODER + register_avcodec(&asv1_encoder); +#endif //CONFIG_ASV1_ENCODER +#ifdef CONFIG_ASV2_ENCODER + register_avcodec(&asv2_encoder); +#endif //CONFIG_ASV2_ENCODER +#ifdef CONFIG_FFV1_ENCODER + register_avcodec(&ffv1_encoder); +#endif //CONFIG_FFV1_ENCODER +#ifdef CONFIG_SNOW_ENCODER + register_avcodec(&snow_encoder); +#endif //CONFIG_SNOW_ENCODER +#ifdef CONFIG_ZLIB_ENCODER + register_avcodec(&zlib_encoder); +#endif //CONFIG_ZLIB_ENCODER +#ifdef CONFIG_DVVIDEO_ENCODER + register_avcodec(&dvvideo_encoder); +#endif //CONFIG_DVVIDEO_ENCODER +#ifdef CONFIG_SONIC_ENCODER + register_avcodec(&sonic_encoder); +#endif //CONFIG_SONIC_ENCODER +#ifdef CONFIG_SONIC_LS_ENCODER + register_avcodec(&sonic_ls_encoder); +#endif //CONFIG_SONIC_LS_ENCODER +#ifdef CONFIG_X264 +#ifdef CONFIG_X264_ENCODER + register_avcodec(&x264_encoder); +#endif //CONFIG_X264_ENCODER +#endif +#ifdef CONFIG_LIBGSM + register_avcodec(&libgsm_encoder); +#endif //CONFIG_LIBGSM +#ifdef CONFIG_RAWVIDEO_ENCODER + register_avcodec(&rawvideo_encoder); +#endif //CONFIG_RAWVIDEO_ENCODER +#endif /* CONFIG_ENCODERS */ + + /* decoders */ +#ifdef CONFIG_DECODERS +#ifdef CONFIG_H263_DECODER + register_avcodec(&h263_decoder); +#endif //CONFIG_H263_DECODER +#ifdef CONFIG_H261_DECODER + register_avcodec(&h261_decoder); +#endif //CONFIG_H261_DECODER +#ifdef CONFIG_MPEG4_DECODER + register_avcodec(&mpeg4_decoder); +#endif //CONFIG_MPEG4_DECODER +#ifdef CONFIG_MSMPEG4V1_DECODER + register_avcodec(&msmpeg4v1_decoder); +#endif //CONFIG_MSMPEG4V1_DECODER +#ifdef CONFIG_MSMPEG4V2_DECODER + register_avcodec(&msmpeg4v2_decoder); +#endif //CONFIG_MSMPEG4V2_DECODER +#ifdef CONFIG_MSMPEG4V3_DECODER + register_avcodec(&msmpeg4v3_decoder); +#endif //CONFIG_MSMPEG4V3_DECODER +#ifdef CONFIG_WMV1_DECODER + register_avcodec(&wmv1_decoder); +#endif //CONFIG_WMV1_DECODER +#ifdef CONFIG_WMV2_DECODER + register_avcodec(&wmv2_decoder); +#endif //CONFIG_WMV2_DECODER +#ifdef CONFIG_VC9_DECODER + register_avcodec(&vc9_decoder); +#endif //CONFIG_VC9_DECODER +/* Reenable when it stops crashing on every file, causing bug report spam. +#ifdef CONFIG_WMV3_DECODER + register_avcodec(&wmv3_decoder); +#endif //CONFIG_WMV3_DECODER +*/ +#ifdef CONFIG_H263I_DECODER + register_avcodec(&h263i_decoder); +#endif //CONFIG_H263I_DECODER +#ifdef CONFIG_FLV_DECODER + register_avcodec(&flv_decoder); +#endif //CONFIG_FLV_DECODER +#ifdef CONFIG_RV10_DECODER + register_avcodec(&rv10_decoder); +#endif //CONFIG_RV10_DECODER +#ifdef CONFIG_RV20_DECODER + register_avcodec(&rv20_decoder); +#endif //CONFIG_RV20_DECODER +#ifdef CONFIG_SVQ1_DECODER + register_avcodec(&svq1_decoder); +#endif //CONFIG_SVQ1_DECODER +#ifdef CONFIG_SVQ3_DECODER + register_avcodec(&svq3_decoder); +#endif //CONFIG_SVQ3_DECODER +#ifdef CONFIG_WMAV1_DECODER + register_avcodec(&wmav1_decoder); +#endif //CONFIG_WMAV1_DECODER +#ifdef CONFIG_WMAV2_DECODER + register_avcodec(&wmav2_decoder); +#endif //CONFIG_WMAV2_DECODER +#ifdef CONFIG_INDEO2_DECODER + register_avcodec(&indeo2_decoder); +#endif //CONFIG_INDEO2_DECODER +#ifdef CONFIG_INDEO3_DECODER + register_avcodec(&indeo3_decoder); +#endif //CONFIG_INDEO3_DECODER +#ifdef CONFIG_TSCC_DECODER + register_avcodec(&tscc_decoder); +#endif //CONFIG_TSCC_DECODER +#ifdef CONFIG_CSCD_DECODER + register_avcodec(&cscd_decoder); +#endif //CONFIG_CSCD_DECODER +#ifdef CONFIG_NUV_DECODER + register_avcodec(&nuv_decoder); +#endif //CONFIG_NUV_DECODER +#ifdef CONFIG_ULTI_DECODER + register_avcodec(&ulti_decoder); +#endif //CONFIG_ULTI_DECODER +#ifdef CONFIG_QDRAW_DECODER + register_avcodec(&qdraw_decoder); +#endif //CONFIG_QDRAW_DECODER +#ifdef CONFIG_XL_DECODER + register_avcodec(&xl_decoder); +#endif //CONFIG_XL_DECODER +#ifdef CONFIG_QPEG_DECODER + register_avcodec(&qpeg_decoder); +#endif //CONFIG_QPEG_DECODER +#ifdef CONFIG_LOCO_DECODER + register_avcodec(&loco_decoder); +#endif //CONFIG_LOCO_DECODER +#ifdef CONFIG_KMVC_DECODER + register_avcodec(&kmvc_decoder); +#endif //CONFIG_KMVC_DECODER +#ifdef CONFIG_WNV1_DECODER + register_avcodec(&wnv1_decoder); +#endif //CONFIG_WNV1_DECODER +#ifdef CONFIG_AASC_DECODER + register_avcodec(&aasc_decoder); +#endif //CONFIG_AASC_DECODER +#ifdef CONFIG_FRAPS_DECODER + register_avcodec(&fraps_decoder); +#endif //CONFIG_FRAPS_DECODER +#ifdef CONFIG_FAAD +#ifdef CONFIG_AAC_DECODER + register_avcodec(&aac_decoder); +#endif //CONFIG_AAC_DECODER +#ifdef CONFIG_MPEG4AAC_DECODER + register_avcodec(&mpeg4aac_decoder); +#endif //CONFIG_MPEG4AAC_DECODER +#endif +#ifdef CONFIG_MPEG1VIDEO_DECODER + register_avcodec(&mpeg1video_decoder); +#endif //CONFIG_MPEG1VIDEO_DECODER +#ifdef CONFIG_MPEG2VIDEO_DECODER + register_avcodec(&mpeg2video_decoder); +#endif //CONFIG_MPEG2VIDEO_DECODER +#ifdef CONFIG_MPEGVIDEO_DECODER + register_avcodec(&mpegvideo_decoder); +#endif //CONFIG_MPEGVIDEO_DECODER +#ifdef HAVE_XVMC +#ifdef CONFIG_MPEG_XVMC_DECODER + register_avcodec(&mpeg_xvmc_decoder); +#endif //CONFIG_MPEG_XVMC_DECODER +#endif +#ifdef CONFIG_DVVIDEO_DECODER + register_avcodec(&dvvideo_decoder); +#endif //CONFIG_DVVIDEO_DECODER +#ifdef CONFIG_MJPEG_DECODER + register_avcodec(&mjpeg_decoder); +#endif //CONFIG_MJPEG_DECODER +#ifdef CONFIG_MJPEGB_DECODER + register_avcodec(&mjpegb_decoder); +#endif //CONFIG_MJPEGB_DECODER +#ifdef CONFIG_SP5X_DECODER + register_avcodec(&sp5x_decoder); +#endif //CONFIG_SP5X_DECODER +#ifdef CONFIG_ZLIB +#ifdef CONFIG_PNG_DECODER + register_avcodec(&png_decoder); +#endif //CONFIG_PNG_DECODER +#endif +#ifdef CONFIG_MP2_DECODER + register_avcodec(&mp2_decoder); +#endif //CONFIG_MP2_DECODER +#ifdef CONFIG_MP3_DECODER + register_avcodec(&mp3_decoder); +#endif //CONFIG_MP3_DECODER +#ifdef CONFIG_MP3ADU_DECODER + register_avcodec(&mp3adu_decoder); +#endif //CONFIG_MP3ADU_DECODER +#ifdef CONFIG_MP3ON4_DECODER + register_avcodec(&mp3on4_decoder); +#endif //CONFIG_MP3ON4_DECODER +#ifdef CONFIG_MACE3_DECODER + register_avcodec(&mace3_decoder); +#endif //CONFIG_MACE3_DECODER +#ifdef CONFIG_MACE6_DECODER + register_avcodec(&mace6_decoder); +#endif //CONFIG_MACE6_DECODER +#ifdef CONFIG_HUFFYUV_DECODER + register_avcodec(&huffyuv_decoder); +#endif //CONFIG_HUFFYUV_DECODER +#ifdef CONFIG_FFVHUFF_DECODER + register_avcodec(&ffvhuff_decoder); +#endif //CONFIG_FFVHUFF_DECODER +#ifdef CONFIG_FFV1_DECODER + register_avcodec(&ffv1_decoder); +#endif //CONFIG_FFV1_DECODER +#ifdef CONFIG_SNOW_DECODER + register_avcodec(&snow_decoder); +#endif //CONFIG_SNOW_DECODER +#ifdef CONFIG_CYUV_DECODER + register_avcodec(&cyuv_decoder); +#endif //CONFIG_CYUV_DECODER +#ifdef CONFIG_H264_DECODER + register_avcodec(&h264_decoder); +#endif //CONFIG_H264_DECODER +#ifdef CONFIG_VP3_DECODER + register_avcodec(&vp3_decoder); +#endif //CONFIG_VP3_DECODER +#if (defined CONFIG_THEORA_DECODER && !defined CONFIG_LIBTHEORA) + register_avcodec(&theora_decoder); +#endif //CONFIG_THEORA_DECODER +#ifdef CONFIG_ASV1_DECODER + register_avcodec(&asv1_decoder); +#endif //CONFIG_ASV1_DECODER +#ifdef CONFIG_ASV2_DECODER + register_avcodec(&asv2_decoder); +#endif //CONFIG_ASV2_DECODER +#ifdef CONFIG_VCR1_DECODER + register_avcodec(&vcr1_decoder); +#endif //CONFIG_VCR1_DECODER +#ifdef CONFIG_CLJR_DECODER + register_avcodec(&cljr_decoder); +#endif //CONFIG_CLJR_DECODER +#ifdef CONFIG_FOURXM_DECODER + register_avcodec(&fourxm_decoder); +#endif //CONFIG_FOURXM_DECODER +#ifdef CONFIG_MDEC_DECODER + register_avcodec(&mdec_decoder); +#endif //CONFIG_MDEC_DECODER +#ifdef CONFIG_ROQ_DECODER + register_avcodec(&roq_decoder); +#endif //CONFIG_ROQ_DECODER +#ifdef CONFIG_INTERPLAY_VIDEO_DECODER + register_avcodec(&interplay_video_decoder); +#endif //CONFIG_INTERPLAY_VIDEO_DECODER +#ifdef CONFIG_XAN_WC3_DECODER + register_avcodec(&xan_wc3_decoder); +#endif //CONFIG_XAN_WC3_DECODER +#ifdef CONFIG_RPZA_DECODER + register_avcodec(&rpza_decoder); +#endif //CONFIG_RPZA_DECODER +#ifdef CONFIG_CINEPAK_DECODER + register_avcodec(&cinepak_decoder); +#endif //CONFIG_CINEPAK_DECODER +#ifdef CONFIG_MSRLE_DECODER + register_avcodec(&msrle_decoder); +#endif //CONFIG_MSRLE_DECODER +#ifdef CONFIG_MSVIDEO1_DECODER + register_avcodec(&msvideo1_decoder); +#endif //CONFIG_MSVIDEO1_DECODER +#ifdef CONFIG_VQA_DECODER + register_avcodec(&vqa_decoder); +#endif //CONFIG_VQA_DECODER +#ifdef CONFIG_IDCIN_DECODER + register_avcodec(&idcin_decoder); +#endif //CONFIG_IDCIN_DECODER +#ifdef CONFIG_EIGHTBPS_DECODER + register_avcodec(&eightbps_decoder); +#endif //CONFIG_EIGHTBPS_DECODER +#ifdef CONFIG_SMC_DECODER + register_avcodec(&smc_decoder); +#endif //CONFIG_SMC_DECODER +#ifdef CONFIG_FLIC_DECODER + register_avcodec(&flic_decoder); +#endif //CONFIG_FLIC_DECODER +#ifdef CONFIG_TRUEMOTION1_DECODER + register_avcodec(&truemotion1_decoder); +#endif //CONFIG_TRUEMOTION1_DECODER +#ifdef CONFIG_TRUEMOTION2_DECODER + register_avcodec(&truemotion2_decoder); +#endif //CONFIG_TRUEMOTION2_DECODER +#ifdef CONFIG_VMDVIDEO_DECODER + register_avcodec(&vmdvideo_decoder); +#endif //CONFIG_VMDVIDEO_DECODER +#ifdef CONFIG_VMDAUDIO_DECODER + register_avcodec(&vmdaudio_decoder); +#endif //CONFIG_VMDAUDIO_DECODER +#ifdef CONFIG_MSZH_DECODER + register_avcodec(&mszh_decoder); +#endif //CONFIG_MSZH_DECODER +#ifdef CONFIG_ZLIB_DECODER + register_avcodec(&zlib_decoder); +#endif //CONFIG_ZLIB_DECODER +#ifdef CONFIG_ZMBV_DECODER + register_avcodec(&zmbv_decoder); +#endif //CONFIG_ZMBV_DECODER +#ifdef CONFIG_SMACKER_DECODER + register_avcodec(&smacker_decoder); +#endif //CONFIG_SMACKER_DECODER +#ifdef CONFIG_SMACKAUD_DECODER + register_avcodec(&smackaud_decoder); +#endif //CONFIG_SMACKAUD_DECODER +#ifdef CONFIG_SONIC_DECODER + register_avcodec(&sonic_decoder); +#endif //CONFIG_SONIC_DECODER +#ifdef CONFIG_AC3 +#ifdef CONFIG_AC3_DECODER + register_avcodec(&ac3_decoder); +#endif //CONFIG_AC3_DECODER +#endif +#ifdef CONFIG_DTS +#ifdef CONFIG_DTS_DECODER + register_avcodec(&dts_decoder); +#endif //CONFIG_DTS_DECODER +#endif +#ifdef CONFIG_RA_144_DECODER + register_avcodec(&ra_144_decoder); +#endif //CONFIG_RA_144_DECODER +#ifdef CONFIG_RA_288_DECODER + register_avcodec(&ra_288_decoder); +#endif //CONFIG_RA_288_DECODER +#ifdef CONFIG_ROQ_DPCM_DECODER + register_avcodec(&roq_dpcm_decoder); +#endif //CONFIG_ROQ_DPCM_DECODER +#ifdef CONFIG_INTERPLAY_DPCM_DECODER + register_avcodec(&interplay_dpcm_decoder); +#endif //CONFIG_INTERPLAY_DPCM_DECODER +#ifdef CONFIG_XAN_DPCM_DECODER + register_avcodec(&xan_dpcm_decoder); +#endif //CONFIG_XAN_DPCM_DECODER +#ifdef CONFIG_SOL_DPCM_DECODER + register_avcodec(&sol_dpcm_decoder); +#endif //CONFIG_SOL_DPCM_DECODER +#ifdef CONFIG_QTRLE_DECODER + register_avcodec(&qtrle_decoder); +#endif //CONFIG_QTRLE_DECODER +#ifdef CONFIG_FLAC_DECODER + register_avcodec(&flac_decoder); +#endif //CONFIG_FLAC_DECODER +#ifdef CONFIG_SHORTEN_DECODER + register_avcodec(&shorten_decoder); +#endif //CONFIG_SHORTEN_DECODER +#ifdef CONFIG_ALAC_DECODER + register_avcodec(&alac_decoder); +#endif //CONFIG_ALAC_DECODER +#ifdef CONFIG_WS_SND1_DECODER + register_avcodec(&ws_snd1_decoder); +#endif //CONFIG_WS_SND1_DECODER +#ifdef CONFIG_VORBIS_DECODER + register_avcodec(&vorbis_decoder); +#endif +#ifdef CONFIG_LIBGSM + register_avcodec(&libgsm_decoder); +#endif //CONFIG_LIBGSM +#ifdef CONFIG_QDM2_DECODER + register_avcodec(&qdm2_decoder); +#endif //CONFIG_QDM2_DECODER +#ifdef CONFIG_COOK_DECODER + register_avcodec(&cook_decoder); +#endif //CONFIG_COOK_DECODER +#ifdef CONFIG_TRUESPEECH_DECODER + register_avcodec(&truespeech_decoder); +#endif //CONFIG_TRUESPEECH_DECODER +#ifdef CONFIG_TTA_DECODER + register_avcodec(&tta_decoder); +#endif //CONFIG_TTA_DECODER +#ifdef CONFIG_AVS_DECODER + register_avcodec(&avs_decoder); +#endif //CONFIG_AVS_DECODER +#ifdef CONFIG_RAWVIDEO_DECODER + register_avcodec(&rawvideo_decoder); +#endif //CONFIG_RAWVIDEO_DECODER +#endif /* CONFIG_DECODERS */ + +#ifdef AMR_NB +#ifdef CONFIG_AMR_NB_DECODER + register_avcodec(&amr_nb_decoder); +#endif //CONFIG_AMR_NB_DECODER +#ifdef CONFIG_ENCODERS +#ifdef CONFIG_AMR_NB_ENCODER + register_avcodec(&amr_nb_encoder); +#endif //CONFIG_AMR_NB_ENCODER +#endif //CONFIG_ENCODERS +#endif /* AMR_NB */ + +#ifdef AMR_WB +#ifdef CONFIG_AMR_WB_DECODER + register_avcodec(&amr_wb_decoder); +#endif //CONFIG_AMR_WB_DECODER +#ifdef CONFIG_ENCODERS +#ifdef CONFIG_AMR_WB_ENCODER + register_avcodec(&amr_wb_encoder); +#endif //CONFIG_AMR_WB_ENCODER +#endif //CONFIG_ENCODERS +#endif /* AMR_WB */ + +#ifdef CONFIG_BMP_DECODER + register_avcodec(&bmp_decoder); +#endif + +#if CONFIG_MMVIDEO_DECODER + register_avcodec(&mmvideo_decoder); +#endif //CONFIG_MMVIDEO_DECODER + + /* pcm codecs */ +#if defined (CONFIG_ENCODERS) && defined (CONFIG_DECODERS) + #define PCM_CODEC(id, name) \ + register_avcodec(& name ## _encoder); \ + register_avcodec(& name ## _decoder); +#elif defined (CONFIG_ENCODERS) + #define PCM_CODEC(id, name) \ + register_avcodec(& name ## _encoder); +#elif defined (CONFIG_DECODERS) + #define PCM_CODEC(id, name) \ + register_avcodec(& name ## _decoder); +#endif + +PCM_CODEC(CODEC_ID_PCM_S32LE, pcm_s32le); +PCM_CODEC(CODEC_ID_PCM_S32BE, pcm_s32be); +PCM_CODEC(CODEC_ID_PCM_U32LE, pcm_u32le); +PCM_CODEC(CODEC_ID_PCM_U32BE, pcm_u32be); +PCM_CODEC(CODEC_ID_PCM_S24LE, pcm_s24le); +PCM_CODEC(CODEC_ID_PCM_S24BE, pcm_s24be); +PCM_CODEC(CODEC_ID_PCM_U24LE, pcm_u24le); +PCM_CODEC(CODEC_ID_PCM_U24BE, pcm_u24be); +PCM_CODEC(CODEC_ID_PCM_S24DAUD, pcm_s24daud); +PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le); +PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be); +PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le); +PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be); +PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8); +PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8); +PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw); +PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw); + + /* adpcm codecs */ +PCM_CODEC(CODEC_ID_ADPCM_IMA_QT, adpcm_ima_qt); +PCM_CODEC(CODEC_ID_ADPCM_IMA_WAV, adpcm_ima_wav); +PCM_CODEC(CODEC_ID_ADPCM_IMA_DK3, adpcm_ima_dk3); +PCM_CODEC(CODEC_ID_ADPCM_IMA_DK4, adpcm_ima_dk4); +PCM_CODEC(CODEC_ID_ADPCM_IMA_WS, adpcm_ima_ws); +PCM_CODEC(CODEC_ID_ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg); +PCM_CODEC(CODEC_ID_ADPCM_MS, adpcm_ms); +PCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm); +PCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa); +PCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx); +PCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea); +PCM_CODEC(CODEC_ID_ADPCM_G726, adpcm_g726); +PCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); +PCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); +PCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2); +#undef PCM_CODEC + + /* subtitles */ +#ifdef CONFIG_DVDSUB_DECODER + register_avcodec(&dvdsub_decoder); +#endif +#ifdef CONFIG_DVDSUB_ENCODER + register_avcodec(&dvdsub_encoder); +#endif + +#ifdef CONFIG_DVBSUB_DECODER + register_avcodec(&dvbsub_decoder); +#endif +#ifdef CONFIG_DVBSUB_ENCODER + register_avcodec(&dvbsub_encoder); +#endif + + /* parsers */ + av_register_codec_parser(&mpegvideo_parser); + av_register_codec_parser(&mpeg4video_parser); +#if defined(CONFIG_H261_DECODER) || defined(CONFIG_H261_ENCODER) + av_register_codec_parser(&h261_parser); +#endif + av_register_codec_parser(&h263_parser); +#ifdef CONFIG_H264_DECODER + av_register_codec_parser(&h264_parser); +#endif + av_register_codec_parser(&mjpeg_parser); + av_register_codec_parser(&pnm_parser); + + av_register_codec_parser(&mpegaudio_parser); + av_register_codec_parser(&ac3_parser); + +#ifdef CONFIG_DVDSUB_DECODER + av_register_codec_parser(&dvdsub_parser); +#endif +#ifdef CONFIG_DVBSUB_DECODER + av_register_codec_parser(&dvbsub_parser); +#endif + av_register_codec_parser(&aac_parser); +} + diff --git a/mpeg4/src/libavcodec/alpha/asm.h b/mpeg4/src/libavcodec/alpha/asm.h new file mode 100644 index 0000000000000000000000000000000000000000..056e043f30f69483c3d1c7cce082246a98be835f --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/asm.h @@ -0,0 +1,189 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef LIBAVCODEC_ALPHA_ASM_H +#define LIBAVCODEC_ALPHA_ASM_H + +#include + +#if defined __GNUC__ +# define GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define GNUC_PREREQ(maj, min) 0 +#endif + +#if GNUC_PREREQ(2,96) +# define likely(x) __builtin_expect((x) != 0, 1) +# define unlikely(x) __builtin_expect((x) != 0, 0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + +#define AMASK_BWX (1 << 0) +#define AMASK_FIX (1 << 1) +#define AMASK_CIX (1 << 2) +#define AMASK_MVI (1 << 8) + +static inline uint64_t BYTE_VEC(uint64_t x) +{ + x |= x << 8; + x |= x << 16; + x |= x << 32; + return x; +} +static inline uint64_t WORD_VEC(uint64_t x) +{ + x |= x << 16; + x |= x << 32; + return x; +} + +#define sextw(x) ((int16_t) (x)) + +#ifdef __GNUC__ +#define ldq(p) \ + (((union { \ + uint64_t __l; \ + __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \ + } *) (p))->__l) +#define ldl(p) \ + (((union { \ + int32_t __l; \ + __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \ + } *) (p))->__l) +#define stq(l, p) \ + do { \ + (((union { \ + uint64_t __l; \ + __typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \ + } *) (p))->__l) = l; \ + } while (0) +#define stl(l, p) \ + do { \ + (((union { \ + int32_t __l; \ + __typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \ + } *) (p))->__l) = l; \ + } while (0) +struct unaligned_long { uint64_t l; } __attribute__((packed)); +#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) +#define uldq(a) (((const struct unaligned_long *) (a))->l) + +#if GNUC_PREREQ(3,3) +#define prefetch(p) __builtin_prefetch((p), 0, 1) +#define prefetch_en(p) __builtin_prefetch((p), 0, 0) +#define prefetch_m(p) __builtin_prefetch((p), 1, 1) +#define prefetch_men(p) __builtin_prefetch((p), 1, 0) +#define cmpbge __builtin_alpha_cmpbge +/* Avoid warnings. */ +#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) +#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) +#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) +#define zap __builtin_alpha_zap +#define zapnot __builtin_alpha_zapnot +#define amask __builtin_alpha_amask +#define implver __builtin_alpha_implver +#define rpcc __builtin_alpha_rpcc +#else +#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) +#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) +#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) +#endif +#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") + +#if GNUC_PREREQ(3,3) && defined(__alpha_max__) +#define minub8 __builtin_alpha_minub8 +#define minsb8 __builtin_alpha_minsb8 +#define minuw4 __builtin_alpha_minuw4 +#define minsw4 __builtin_alpha_minsw4 +#define maxub8 __builtin_alpha_maxub8 +#define maxsb8 __builtin_alpha_maxsb8 +#define maxuw4 __builtin_alpha_maxuw4 +#define maxsw4 __builtin_alpha_maxsw4 +#define perr __builtin_alpha_perr +#define pklb __builtin_alpha_pklb +#define pkwb __builtin_alpha_pkwb +#define unpkbl __builtin_alpha_unpkbl +#define unpkbw __builtin_alpha_unpkbw +#else +#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) +#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#endif + +#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ + +#include +#define ldq(p) (*(const uint64_t *) (p)) +#define ldl(p) (*(const int32_t *) (p)) +#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) +#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0) +#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) +#define uldq(a) (*(const __unaligned uint64_t *) (a)) +#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) +#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) +#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) +#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) +#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) +#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) +#define amask(a) asm ("amask %a0,%v0", a) +#define implver() asm ("implver %v0") +#define rpcc() asm ("rpcc %v0") +#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) +#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) +#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) +#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) +#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) +#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) +#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) +#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) +#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) +#define pklb(a) asm ("pklb %a0,%v0", a) +#define pkwb(a) asm ("pkwb %a0,%v0", a) +#define unpkbl(a) asm ("unpkbl %a0,%v0", a) +#define unpkbw(a) asm ("unpkbw %a0,%v0", a) +#define wh64(a) asm ("wh64 %a0", a) + +#else +#error "Unknown compiler!" +#endif + +#endif /* LIBAVCODEC_ALPHA_ASM_H */ diff --git a/mpeg4/src/libavcodec/alpha/dsputil_alpha.c b/mpeg4/src/libavcodec/alpha/dsputil_alpha.c new file mode 100644 index 0000000000000000000000000000000000000000..299a25dc43b94b5cdef462547a7384c6c3d6f2dc --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/dsputil_alpha.c @@ -0,0 +1,360 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.h" +#include "../dsputil.h" + +extern void simple_idct_axp(DCTELEM *block); +extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block); +extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block); + +void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, + int line_size, int h); +void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, + int line_size); +void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, + int line_size); +void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); +void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); + +void get_pixels_mvi(DCTELEM *restrict block, + const uint8_t *restrict pixels, int line_size); +void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, + int stride); +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); + +#if 0 +/* These functions were the base for the optimized assembler routines, + and remain here for documentation purposes. */ +static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, + int line_size) +{ + int i = 8; + uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ + + do { + uint64_t shorts0, shorts1; + + shorts0 = ldq(block); + shorts0 = maxsw4(shorts0, 0); + shorts0 = minsw4(shorts0, clampmask); + stl(pkwb(shorts0), pixels); + + shorts1 = ldq(block + 4); + shorts1 = maxsw4(shorts1, 0); + shorts1 = minsw4(shorts1, clampmask); + stl(pkwb(shorts1), pixels + 4); + + pixels += line_size; + block += 8; + } while (--i); +} + +void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, + int line_size) +{ + int h = 8; + /* Keep this function a leaf function by generating the constants + manually (mainly for the hack value ;-). */ + uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */ + uint64_t signmask = zap(-1, 0x33); + signmask ^= signmask >> 1; /* 0x8000800080008000 */ + + do { + uint64_t shorts0, pix0, signs0; + uint64_t shorts1, pix1, signs1; + + shorts0 = ldq(block); + shorts1 = ldq(block + 4); + + pix0 = unpkbw(ldl(pixels)); + /* Signed subword add (MMX paddw). */ + signs0 = shorts0 & signmask; + shorts0 &= ~signmask; + shorts0 += pix0; + shorts0 ^= signs0; + /* Clamp. */ + shorts0 = maxsw4(shorts0, 0); + shorts0 = minsw4(shorts0, clampmask); + + /* Next 4. */ + pix1 = unpkbw(ldl(pixels + 4)); + signs1 = shorts1 & signmask; + shorts1 &= ~signmask; + shorts1 += pix1; + shorts1 ^= signs1; + shorts1 = maxsw4(shorts1, 0); + shorts1 = minsw4(shorts1, clampmask); + + stl(pkwb(shorts0), pixels); + stl(pkwb(shorts1), pixels + 4); + + pixels += line_size; + block += 8; + } while (--h); +} +#endif + +static void clear_blocks_axp(DCTELEM *blocks) { + uint64_t *p = (uint64_t *) blocks; + int n = sizeof(DCTELEM) * 6 * 64; + + do { + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = 0; + p[4] = 0; + p[5] = 0; + p[6] = 0; + p[7] = 0; + p += 8; + n -= 8 * 8; + } while (n); +} + +static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) +{ + return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); +} + +static inline uint64_t avg2(uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); +} + +#if 0 +/* The XY2 routines basically utilize this scheme, but reuse parts in + each iteration. */ +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) +{ + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); + return r1 + r2; +} +#endif + +#define OP(LOAD, STORE) \ + do { \ + STORE(LOAD(pixels), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP_X2(LOAD, STORE) \ + do { \ + uint64_t pix1, pix2; \ + \ + pix1 = LOAD(pixels); \ + pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE(AVG2(pix1, pix2), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP_Y2(LOAD, STORE) \ + do { \ + uint64_t pix = LOAD(pixels); \ + do { \ + uint64_t next_pix; \ + \ + pixels += line_size; \ + next_pix = LOAD(pixels); \ + STORE(AVG2(pix, next_pix), block); \ + block += line_size; \ + pix = next_pix; \ + } while (--h); \ + } while (0) + +#define OP_XY2(LOAD, STORE) \ + do { \ + uint64_t pix1 = LOAD(pixels); \ + uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ + + (pix2 & BYTE_VEC(0x03)); \ + uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ + + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ + \ + do { \ + uint64_t npix1, npix2; \ + uint64_t npix_l, npix_h; \ + uint64_t avg; \ + \ + pixels += line_size; \ + npix1 = LOAD(pixels); \ + npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + npix_l = (npix1 & BYTE_VEC(0x03)) \ + + (npix2 & BYTE_VEC(0x03)); \ + npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ + + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ + avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ + + pix_h + npix_h; \ + STORE(avg, block); \ + \ + block += line_size; \ + pix_l = npix_l; \ + pix_h = npix_h; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ +static void OPNAME ## _pixels ## SUFF ## _axp \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + int line_size, int h) \ +{ \ + if ((size_t) pixels & 0x7) { \ + OPKIND(uldq, STORE); \ + } else { \ + OPKIND(ldq, STORE); \ + } \ +} \ + \ +static void OPNAME ## _pixels16 ## SUFF ## _axp \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + int line_size, int h) \ +{ \ + OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \ + OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \ +} + +#define PIXOP(OPNAME, STORE) \ + MAKE_OP(OPNAME, , OP, STORE) \ + MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ + MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ + MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) + +/* Rounding primitives. */ +#define AVG2 avg2 +#define AVG4 avg4 +#define AVG4_ROUNDER BYTE_VEC(0x02) +#define STORE(l, b) stq(l, b) +PIXOP(put, STORE); + +#undef STORE +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(avg, STORE); + +/* Not rounding primitives. */ +#undef AVG2 +#undef AVG4 +#undef AVG4_ROUNDER +#undef STORE +#define AVG2 avg2_no_rnd +#define AVG4 avg4_no_rnd +#define AVG4_ROUNDER BYTE_VEC(0x01) +#define STORE(l, b) stq(l, b) +PIXOP(put_no_rnd, STORE); + +#undef STORE +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(avg_no_rnd, STORE); + +void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, + int line_size, int h) +{ + put_pixels_axp_asm(block, pixels, line_size, h); + put_pixels_axp_asm(block + 8, pixels + 8, line_size, h); +} + +static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) +{ + return pix_abs16x16_mvi_asm(a, b, stride); +} + +void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) +{ + c->put_pixels_tab[0][0] = put_pixels16_axp_asm; + c->put_pixels_tab[0][1] = put_pixels16_x2_axp; + c->put_pixels_tab[0][2] = put_pixels16_y2_axp; + c->put_pixels_tab[0][3] = put_pixels16_xy2_axp; + + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp; + + c->avg_pixels_tab[0][0] = avg_pixels16_axp; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp; + + c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp; + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp; + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp; + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp; + + c->put_pixels_tab[1][0] = put_pixels_axp_asm; + c->put_pixels_tab[1][1] = put_pixels_x2_axp; + c->put_pixels_tab[1][2] = put_pixels_y2_axp; + c->put_pixels_tab[1][3] = put_pixels_xy2_axp; + + c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp; + + c->avg_pixels_tab[1][0] = avg_pixels_axp; + c->avg_pixels_tab[1][1] = avg_pixels_x2_axp; + c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; + c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; + + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp; + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp; + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp; + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp; + + c->clear_blocks = clear_blocks_axp; + + /* amask clears all bits that correspond to present features. */ + if (amask(AMASK_MVI) == 0) { + c->put_pixels_clamped = put_pixels_clamped_mvi_asm; + c->add_pixels_clamped = add_pixels_clamped_mvi_asm; + + c->get_pixels = get_pixels_mvi; + c->diff_pixels = diff_pixels_mvi; + c->sad[0] = sad16x16_mvi; + c->sad[1] = pix_abs8x8_mvi; +// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed + c->pix_abs[0][0] = sad16x16_mvi; + c->pix_abs[1][0] = pix_abs8x8_mvi; + c->pix_abs[0][1] = pix_abs16x16_x2_mvi; + c->pix_abs[0][2] = pix_abs16x16_y2_mvi; + c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; + } + + put_pixels_clamped_axp_p = c->put_pixels_clamped; + add_pixels_clamped_axp_p = c->add_pixels_clamped; + + c->idct_put = simple_idct_put_axp; + c->idct_add = simple_idct_add_axp; + c->idct = simple_idct_axp; +} diff --git a/mpeg4/src/libavcodec/alpha/dsputil_alpha_asm.S b/mpeg4/src/libavcodec/alpha/dsputil_alpha_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..29ba9dc024c665f6c3199ba906cfe1f1409a42cb --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/dsputil_alpha_asm.S @@ -0,0 +1,283 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * These functions are scheduled for pca56. They should work + * reasonably on ev6, though. + */ + +#include "regdef.h" + +/* Some nicer register names. */ +#define ta t10 +#define tb t11 +#define tc t12 +#define td AT +/* Danger: these overlap with the argument list and the return value */ +#define te a5 +#define tf a4 +#define tg a3 +#define th v0 + + .set noat + .set noreorder + .arch pca56 + .text + +/************************************************************************ + * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, + * int line_size, int h) + */ + .align 6 + .globl put_pixels_axp_asm + .ent put_pixels_axp_asm +put_pixels_axp_asm: + .frame sp, 0, ra + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + and a1, 7, t0 + beq t0, $aligned + + .align 4 +$unaligned: + ldq_u t0, 0(a1) + ldq_u t1, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t2, 0(a1) + ldq_u t3, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t4, 0(a1) + ldq_u t5, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t6, 0(a1) + ldq_u t7, 8(a1) + extql t0, a1, t0 + addq a1, a2, a1 + + extqh t1, a1, t1 + addq a0, a2, t8 + extql t2, a1, t2 + addq t8, a2, t9 + + extqh t3, a1, t3 + addq t9, a2, ta + extql t4, a1, t4 + or t0, t1, t0 + + extqh t5, a1, t5 + or t2, t3, t2 + extql t6, a1, t6 + or t4, t5, t4 + + extqh t7, a1, t7 + or t6, t7, t6 + stq t0, 0(a0) + stq t2, 0(t8) + + stq t4, 0(t9) + subq a3, 4, a3 + stq t6, 0(ta) + addq ta, a2, a0 + + bne a3, $unaligned + ret + + .align 4 +$aligned: + ldq t0, 0(a1) + addq a1, a2, a1 + ldq t1, 0(a1) + addq a1, a2, a1 + + ldq t2, 0(a1) + addq a1, a2, a1 + ldq t3, 0(a1) + + addq a0, a2, t4 + addq a1, a2, a1 + addq t4, a2, t5 + subq a3, 4, a3 + + stq t0, 0(a0) + addq t5, a2, t6 + stq t1, 0(t4) + addq t6, a2, a0 + + stq t2, 0(t5) + stq t3, 0(t6) + + bne a3, $aligned + ret + .end put_pixels_axp_asm + +/************************************************************************ + * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, + * int line_size) + */ + .align 6 + .globl put_pixels_clamped_mvi_asm + .ent put_pixels_clamped_mvi_asm +put_pixels_clamped_mvi_asm: + .frame sp, 0, ra + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + lda t8, -1 + lda t9, 8 # loop counter + zap t8, 0xaa, t8 # 00ff00ff00ff00ff + + .align 4 +1: ldq t0, 0(a0) + ldq t1, 8(a0) + ldq t2, 16(a0) + ldq t3, 24(a0) + + maxsw4 t0, zero, t0 + subq t9, 2, t9 + maxsw4 t1, zero, t1 + lda a0, 32(a0) + + maxsw4 t2, zero, t2 + addq a1, a2, ta + maxsw4 t3, zero, t3 + minsw4 t0, t8, t0 + + minsw4 t1, t8, t1 + minsw4 t2, t8, t2 + minsw4 t3, t8, t3 + pkwb t0, t0 + + pkwb t1, t1 + pkwb t2, t2 + pkwb t3, t3 + stl t0, 0(a1) + + stl t1, 4(a1) + addq ta, a2, a1 + stl t2, 0(ta) + stl t3, 4(ta) + + bne t9, 1b + ret + .end put_pixels_clamped_mvi_asm + +/************************************************************************ + * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, + * int line_size) + */ + .align 6 + .globl add_pixels_clamped_mvi_asm + .ent add_pixels_clamped_mvi_asm +add_pixels_clamped_mvi_asm: + .frame sp, 0, ra + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + lda t1, -1 + lda th, 8 + zap t1, 0x33, tg + nop + + srl tg, 1, t0 + xor tg, t0, tg # 0x8000800080008000 + zap t1, 0xaa, tf # 0x00ff00ff00ff00ff + + .align 4 +1: ldl t1, 0(a1) # pix0 (try to hit cache line soon) + ldl t4, 4(a1) # pix1 + addq a1, a2, te # pixels += line_size + ldq t0, 0(a0) # shorts0 + + ldl t7, 0(te) # pix2 (try to hit cache line soon) + ldl ta, 4(te) # pix3 + ldq t3, 8(a0) # shorts1 + ldq t6, 16(a0) # shorts2 + + ldq t9, 24(a0) # shorts3 + unpkbw t1, t1 # 0 0 (quarter/op no.) + and t0, tg, t2 # 0 1 + unpkbw t4, t4 # 1 0 + + bic t0, tg, t0 # 0 2 + unpkbw t7, t7 # 2 0 + and t3, tg, t5 # 1 1 + addq t0, t1, t0 # 0 3 + + xor t0, t2, t0 # 0 4 + unpkbw ta, ta # 3 0 + and t6, tg, t8 # 2 1 + maxsw4 t0, zero, t0 # 0 5 + + bic t3, tg, t3 # 1 2 + bic t6, tg, t6 # 2 2 + minsw4 t0, tf, t0 # 0 6 + addq t3, t4, t3 # 1 3 + + pkwb t0, t0 # 0 7 + xor t3, t5, t3 # 1 4 + maxsw4 t3, zero, t3 # 1 5 + addq t6, t7, t6 # 2 3 + + xor t6, t8, t6 # 2 4 + and t9, tg, tb # 3 1 + minsw4 t3, tf, t3 # 1 6 + bic t9, tg, t9 # 3 2 + + maxsw4 t6, zero, t6 # 2 5 + addq t9, ta, t9 # 3 3 + stl t0, 0(a1) # 0 8 + minsw4 t6, tf, t6 # 2 6 + + xor t9, tb, t9 # 3 4 + maxsw4 t9, zero, t9 # 3 5 + lda a0, 32(a0) # block += 16; + pkwb t3, t3 # 1 7 + + minsw4 t9, tf, t9 # 3 6 + subq th, 2, th + pkwb t6, t6 # 2 7 + pkwb t9, t9 # 3 7 + + stl t3, 4(a1) # 1 8 + addq te, a2, a1 # pixels += line_size + stl t6, 0(te) # 2 8 + stl t9, 4(te) # 3 8 + + bne th, 1b + ret + .end add_pixels_clamped_mvi_asm diff --git a/mpeg4/src/libavcodec/alpha/motion_est_alpha.c b/mpeg4/src/libavcodec/alpha/motion_est_alpha.c new file mode 100644 index 0000000000000000000000000000000000000000..ea8580be75e9e671fec58a1198f49975a7de9001 --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/motion_est_alpha.c @@ -0,0 +1,343 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.h" +#include "../dsputil.h" + +void get_pixels_mvi(DCTELEM *restrict block, + const uint8_t *restrict pixels, int line_size) +{ + int h = 8; + + do { + uint64_t p; + + p = ldq(pixels); + stq(unpkbw(p), block); + stq(unpkbw(p >> 32), block + 4); + + pixels += line_size; + block += 8; + } while (--h); +} + +void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, + int stride) { + int h = 8; + uint64_t mask = 0x4040; + + mask |= mask << 16; + mask |= mask << 32; + do { + uint64_t x, y, c, d, a; + uint64_t signs; + + x = ldq(s1); + y = ldq(s2); + c = cmpbge(x, y); + d = x - y; + a = zap(mask, c); /* We use 0x4040404040404040 here... */ + d += 4 * a; /* ...so we can use s4addq here. */ + signs = zap(-1, c); + + stq(unpkbw(d) | (unpkbw(signs) << 8), block); + stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4); + + s1 += stride; + s2 += stride; + block += 8; + } while (--h); +} + +static inline uint64_t avg2(uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); +} + +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) +{ + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); + return r1 + r2; +} + +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) +{ + int result = 0; + + if ((size_t) pix2 & 0x7) { + /* works only when pix2 is actually unaligned */ + do { /* do 8 pixel a time */ + uint64_t p1, p2; + + p1 = ldq(pix1); + p2 = uldq(pix2); + result += perr(p1, p2); + + pix1 += line_size; + pix2 += line_size; + } while (--h); + } else { + do { + uint64_t p1, p2; + + p1 = ldq(pix1); + p2 = ldq(pix2); + result += perr(p1, p2); + + pix1 += line_size; + pix2 += line_size; + } while (--h); + } + + return result; +} + +#if 0 /* now done in assembly */ +int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +{ + int result = 0; + int h = 16; + + if ((size_t) pix2 & 0x7) { + /* works only when pix2 is actually unaligned */ + do { /* do 16 pixel a time */ + uint64_t p1_l, p1_r, p2_l, p2_r; + uint64_t t; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + t = ldq_u(pix2 + 8); + p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); + p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); + pix1 += line_size; + pix2 += line_size; + + result += perr(p1_l, p2_l) + + perr(p1_r, p2_r); + } while (--h); + } else { + do { + uint64_t p1_l, p1_r, p2_l, p2_r; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + p2_l = ldq(pix2); + p2_r = ldq(pix2 + 8); + pix1 += line_size; + pix2 += line_size; + + result += perr(p1_l, p2_l) + + perr(p1_r, p2_r); + } while (--h); + } + + return result; +} +#endif + +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) +{ + int result = 0; + uint64_t disalign = (size_t) pix2 & 0x7; + + switch (disalign) { + case 0: + do { + uint64_t p1_l, p1_r, p2_l, p2_r; + uint64_t l, r; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + l = ldq(pix2); + r = ldq(pix2 + 8); + p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56)); + p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56)); + pix1 += line_size; + pix2 += line_size; + + result += perr(p1_l, p2_l) + + perr(p1_r, p2_r); + } while (--h); + break; + case 7: + /* |.......l|lllllllr|rrrrrrr*| + This case is special because disalign1 would be 8, which + gets treated as 0 by extqh. At least it is a bit faster + that way :) */ + do { + uint64_t p1_l, p1_r, p2_l, p2_r; + uint64_t l, m, r; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + l = ldq_u(pix2); + m = ldq_u(pix2 + 8); + r = ldq_u(pix2 + 16); + p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m); + p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r); + pix1 += line_size; + pix2 += line_size; + + result += perr(p1_l, p2_l) + + perr(p1_r, p2_r); + } while (--h); + break; + default: + do { + uint64_t disalign1 = disalign + 1; + uint64_t p1_l, p1_r, p2_l, p2_r; + uint64_t l, m, r; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + l = ldq_u(pix2); + m = ldq_u(pix2 + 8); + r = ldq_u(pix2 + 16); + p2_l = avg2(extql(l, disalign) | extqh(m, disalign), + extql(l, disalign1) | extqh(m, disalign1)); + p2_r = avg2(extql(m, disalign) | extqh(r, disalign), + extql(m, disalign1) | extqh(r, disalign1)); + pix1 += line_size; + pix2 += line_size; + + result += perr(p1_l, p2_l) + + perr(p1_r, p2_r); + } while (--h); + break; + } + return result; +} + +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) +{ + int result = 0; + + if ((size_t) pix2 & 0x7) { + uint64_t t, p2_l, p2_r; + t = ldq_u(pix2 + 8); + p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); + p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); + + do { + uint64_t p1_l, p1_r, np2_l, np2_r; + uint64_t t; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + pix2 += line_size; + t = ldq_u(pix2 + 8); + np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2); + np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2); + + result += perr(p1_l, avg2(p2_l, np2_l)) + + perr(p1_r, avg2(p2_r, np2_r)); + + pix1 += line_size; + p2_l = np2_l; + p2_r = np2_r; + + } while (--h); + } else { + uint64_t p2_l, p2_r; + p2_l = ldq(pix2); + p2_r = ldq(pix2 + 8); + do { + uint64_t p1_l, p1_r, np2_l, np2_r; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + pix2 += line_size; + np2_l = ldq(pix2); + np2_r = ldq(pix2 + 8); + + result += perr(p1_l, avg2(p2_l, np2_l)) + + perr(p1_r, avg2(p2_r, np2_r)); + + pix1 += line_size; + p2_l = np2_l; + p2_r = np2_r; + } while (--h); + } + return result; +} + +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) +{ + int result = 0; + + uint64_t p1_l, p1_r; + uint64_t p2_l, p2_r, p2_x; + + p1_l = ldq(pix1); + p1_r = ldq(pix1 + 8); + + if ((size_t) pix2 & 0x7) { /* could be optimized a lot */ + p2_l = uldq(pix2); + p2_r = uldq(pix2 + 8); + p2_x = (uint64_t) pix2[16] << 56; + } else { + p2_l = ldq(pix2); + p2_r = ldq(pix2 + 8); + p2_x = ldq(pix2 + 16) << 56; + } + + do { + uint64_t np1_l, np1_r; + uint64_t np2_l, np2_r, np2_x; + + pix1 += line_size; + pix2 += line_size; + + np1_l = ldq(pix1); + np1_r = ldq(pix1 + 8); + + if ((size_t) pix2 & 0x7) { /* could be optimized a lot */ + np2_l = uldq(pix2); + np2_r = uldq(pix2 + 8); + np2_x = (uint64_t) pix2[16] << 56; + } else { + np2_l = ldq(pix2); + np2_r = ldq(pix2 + 8); + np2_x = ldq(pix2 + 16) << 56; + } + + result += perr(p1_l, + avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56), + np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56))) + + perr(p1_r, + avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x), + np2_r, (np2_r >> 8) | ((uint64_t) np2_x))); + + p1_l = np1_l; + p1_r = np1_r; + p2_l = np2_l; + p2_r = np2_r; + p2_x = np2_x; + } while (--h); + + return result; +} diff --git a/mpeg4/src/libavcodec/alpha/motion_est_mvi_asm.S b/mpeg4/src/libavcodec/alpha/motion_est_mvi_asm.S new file mode 100644 index 0000000000000000000000000000000000000000..e043f43715232606c5b42e02f6bf86fbc2e932d4 --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/motion_est_mvi_asm.S @@ -0,0 +1,183 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "regdef.h" + +/* Some nicer register names. */ +#define ta t10 +#define tb t11 +#define tc t12 +#define td AT +/* Danger: these overlap with the argument list and the return value */ +#define te a5 +#define tf a4 +#define tg a3 +#define th v0 + + .set noat + .set noreorder + .arch pca56 + .text + +/***************************************************************************** + * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) + * + * This code is written with a pca56 in mind. For ev6, one should + * really take the increased latency of 3 cycles for MVI instructions + * into account. + * + * It is important to keep the loading and first use of a register as + * far apart as possible, because if a register is accessed before it + * has been fetched from memory, the CPU will stall. + */ + .align 4 + .globl pix_abs16x16_mvi_asm + .ent pix_abs16x16_mvi_asm +pix_abs16x16_mvi_asm: + .frame sp, 0, ra, 0 + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + and a1, 7, t0 + clr v0 + lda a3, 16 + beq t0, $aligned + .align 4 +$unaligned: + /* Registers: + line 0: + t0: left_u -> left lo -> left + t1: mid + t2: right_u -> right hi -> right + t3: ref left + t4: ref right + line 1: + t5: left_u -> left lo -> left + t6: mid + t7: right_u -> right hi -> right + t8: ref left + t9: ref right + temp: + ta: left hi + tb: right lo + tc: error left + td: error right */ + + /* load line 0 */ + ldq_u t0, 0(a1) # left_u + ldq_u t1, 8(a1) # mid + ldq_u t2, 16(a1) # right_u + ldq t3, 0(a0) # ref left + ldq t4, 8(a0) # ref right + addq a0, a2, a0 # pix1 + addq a1, a2, a1 # pix2 + /* load line 1 */ + ldq_u t5, 0(a1) # left_u + ldq_u t6, 8(a1) # mid + ldq_u t7, 16(a1) # right_u + ldq t8, 0(a0) # ref left + ldq t9, 8(a0) # ref right + addq a0, a2, a0 # pix1 + addq a1, a2, a1 # pix2 + /* calc line 0 */ + extql t0, a1, t0 # left lo + extqh t1, a1, ta # left hi + extql t1, a1, tb # right lo + or t0, ta, t0 # left + extqh t2, a1, t2 # right hi + perr t3, t0, tc # error left + or t2, tb, t2 # right + perr t4, t2, td # error right + addq v0, tc, v0 # add error left + addq v0, td, v0 # add error left + /* calc line 1 */ + extql t5, a1, t5 # left lo + extqh t6, a1, ta # left hi + extql t6, a1, tb # right lo + or t5, ta, t5 # left + extqh t7, a1, t7 # right hi + perr t8, t5, tc # error left + or t7, tb, t7 # right + perr t9, t7, td # error right + addq v0, tc, v0 # add error left + addq v0, td, v0 # add error left + /* loop */ + subq a3, 2, a3 # h -= 2 + bne a3, $unaligned + ret + + .align 4 +$aligned: + /* load line 0 */ + ldq t0, 0(a1) # left + ldq t1, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq t2, 0(a0) # ref left + ldq t3, 8(a0) # ref right + addq a0, a2, a0 # pix1 + /* load line 1 */ + ldq t4, 0(a1) # left + ldq t5, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq t6, 0(a0) # ref left + ldq t7, 8(a0) # ref right + addq a0, a2, a0 # pix1 + /* load line 2 */ + ldq t8, 0(a1) # left + ldq t9, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq ta, 0(a0) # ref left + ldq tb, 8(a0) # ref right + addq a0, a2, a0 # pix1 + /* load line 3 */ + ldq tc, 0(a1) # left + ldq td, 8(a1) # right + addq a1, a2, a1 # pix2 + ldq te, 0(a0) # ref left + ldq tf, 8(a0) # ref right + /* calc line 0 */ + perr t0, t2, t0 # error left + addq a0, a2, a0 # pix1 + perr t1, t3, t1 # error right + addq v0, t0, v0 # add error left + /* calc line 1 */ + perr t4, t6, t0 # error left + addq v0, t1, v0 # add error right + perr t5, t7, t1 # error right + addq v0, t0, v0 # add error left + /* calc line 2 */ + perr t8, ta, t0 # error left + addq v0, t1, v0 # add error right + perr t9, tb, t1 # error right + addq v0, t0, v0 # add error left + /* calc line 3 */ + perr tc, te, t0 # error left + addq v0, t1, v0 # add error right + perr td, tf, t1 # error right + addq v0, t0, v0 # add error left + addq v0, t1, v0 # add error right + /* loop */ + subq a3, 4, a3 # h -= 4 + bne a3, $aligned + ret + .end pix_abs16x16_mvi_asm diff --git a/mpeg4/src/libavcodec/alpha/mpegvideo_alpha.c b/mpeg4/src/libavcodec/alpha/mpegvideo_alpha.c new file mode 100644 index 0000000000000000000000000000000000000000..4c512451e0335db4504b0f588571c8dd36795915 --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/mpegvideo_alpha.c @@ -0,0 +1,145 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.h" +#include "../dsputil.h" +#include "../mpegvideo.h" + +static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, + int n, int qscale) +{ + int i, n_coeffs; + uint64_t qmul, qadd; + uint64_t correction; + DCTELEM *orig_block = block; + DCTELEM block0; /* might not be used uninitialized */ + + qadd = WORD_VEC((qscale - 1) | 1); + qmul = qscale << 1; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + + if (!s->h263_aic) { + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + } else { + qadd = 0; + } + n_coeffs = 63; // does not always use zigzag table + + for(i = 0; i <= n_coeffs; block += 4, i += 4) { + uint64_t levels, negmask, zeros, add; + + levels = ldq(block); + if (levels == 0) + continue; + +#ifdef __alpha_max__ + /* I don't think the speed difference justifies runtime + detection. */ + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ +#else + negmask = cmpbge(WORD_VEC(0x7fff), levels); + negmask &= (negmask >> 1) | (1 << 7); + negmask = zap(-1, negmask); +#endif + + zeros = cmpbge(0, levels); + zeros &= zeros >> 1; + /* zeros |= zeros << 1 is not needed since qadd <= 255, so + zapping the lower byte suffices. */ + + levels *= qmul; + levels -= correction & (negmask << 16); + + /* Negate qadd for negative levels. */ + add = qadd ^ negmask; + add += WORD_VEC(0x0001) & negmask; + /* Set qadd to 0 for levels == 0. */ + add = zap(add, zeros); + + levels += add; + + stq(levels, block); + } + + if (s->mb_intra && !s->h263_aic) + orig_block[0] = block0; +} + +static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, + int n, int qscale) +{ + int i, n_coeffs; + uint64_t qmul, qadd; + uint64_t correction; + + qadd = WORD_VEC((qscale - 1) | 1); + qmul = qscale << 1; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + + n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + + for(i = 0; i <= n_coeffs; block += 4, i += 4) { + uint64_t levels, negmask, zeros, add; + + levels = ldq(block); + if (levels == 0) + continue; + +#ifdef __alpha_max__ + /* I don't think the speed difference justifies runtime + detection. */ + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ +#else + negmask = cmpbge(WORD_VEC(0x7fff), levels); + negmask &= (negmask >> 1) | (1 << 7); + negmask = zap(-1, negmask); +#endif + + zeros = cmpbge(0, levels); + zeros &= zeros >> 1; + /* zeros |= zeros << 1 is not needed since qadd <= 255, so + zapping the lower byte suffices. */ + + levels *= qmul; + levels -= correction & (negmask << 16); + + /* Negate qadd for negative levels. */ + add = qadd ^ negmask; + add += WORD_VEC(0x0001) & negmask; + /* Set qadd to 0 for levels == 0. */ + add = zap(add, zeros); + + levels += add; + + stq(levels, block); + } +} + +void MPV_common_init_axp(MpegEncContext *s) +{ + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; +} diff --git a/mpeg4/src/libavcodec/alpha/regdef.h b/mpeg4/src/libavcodec/alpha/regdef.h new file mode 100644 index 0000000000000000000000000000000000000000..7e7fc06b2c2840e8c3f70eddadfb63a3e54c1a29 --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/regdef.h @@ -0,0 +1,45 @@ +/* Some BSDs don't seem to have regdef.h... sigh */ +#ifndef alpha_regdef_h +#define alpha_regdef_h + +#define v0 $0 /* function return value */ + +#define t0 $1 /* temporary registers (caller-saved) */ +#define t1 $2 +#define t2 $3 +#define t3 $4 +#define t4 $5 +#define t5 $6 +#define t6 $7 +#define t7 $8 + +#define s0 $9 /* saved-registers (callee-saved registers) */ +#define s1 $10 +#define s2 $11 +#define s3 $12 +#define s4 $13 +#define s5 $14 +#define s6 $15 +#define fp s6 /* frame-pointer (s6 in frame-less procedures) */ + +#define a0 $16 /* argument registers (caller-saved) */ +#define a1 $17 +#define a2 $18 +#define a3 $19 +#define a4 $20 +#define a5 $21 + +#define t8 $22 /* more temps (caller-saved) */ +#define t9 $23 +#define t10 $24 +#define t11 $25 +#define ra $26 /* return address register */ +#define t12 $27 + +#define pv t12 /* procedure-variable register */ +#define AT $at /* assembler temporary */ +#define gp $29 /* global pointer */ +#define sp $30 /* stack pointer */ +#define zero $31 /* reads as zero, writes are noops */ + +#endif /* alpha_regdef_h */ diff --git a/mpeg4/src/libavcodec/alpha/simple_idct_alpha.c b/mpeg4/src/libavcodec/alpha/simple_idct_alpha.c new file mode 100644 index 0000000000000000000000000000000000000000..3a5db009be5b4eca1d45cc17bac1487c2aa93277 --- /dev/null +++ b/mpeg4/src/libavcodec/alpha/simple_idct_alpha.c @@ -0,0 +1,306 @@ +/* + * Simple IDCT (Alpha optimized) + * + * Copyright (c) 2001 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * based upon some outcommented c code from mpeg2dec (idct_mmx.c + * written by Aaron Holtzman ) + * + * Alpha optimiziations by Mns Rullgrd + * and Falk Hueffner + */ + +#include "asm.h" +#include "../dsputil.h" + +extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); +extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); + +// cos(i * M_PI / 16) * sqrt(2) * (1 << 14) +// W4 is actually exactly 16384, but using 16383 works around +// accumulating rounding errors for some encoders +#define W1 ((int_fast32_t) 22725) +#define W2 ((int_fast32_t) 21407) +#define W3 ((int_fast32_t) 19266) +#define W4 ((int_fast32_t) 16383) +#define W5 ((int_fast32_t) 12873) +#define W6 ((int_fast32_t) 8867) +#define W7 ((int_fast32_t) 4520) +#define ROW_SHIFT 11 +#define COL_SHIFT 20 + +/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ +static inline int idct_row(DCTELEM *row) +{ + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t; + uint64_t l, r, t2; + l = ldq(row); + r = ldq(row + 4); + + if (l == 0 && r == 0) + return 0; + + a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1)); + + if (((l & ~0xffffUL) | r) == 0) { + a0 >>= ROW_SHIFT; + t2 = (uint16_t) a0; + t2 |= t2 << 16; + t2 |= t2 << 32; + + stq(t2, row); + stq(t2, row + 4); + return 1; + } + + a1 = a0; + a2 = a0; + a3 = a0; + + t = extwl(l, 4); /* row[2] */ + if (t != 0) { + t = sextw(t); + a0 += W2 * t; + a1 += W6 * t; + a2 -= W6 * t; + a3 -= W2 * t; + } + + t = extwl(r, 0); /* row[4] */ + if (t != 0) { + t = sextw(t); + a0 += W4 * t; + a1 -= W4 * t; + a2 -= W4 * t; + a3 += W4 * t; + } + + t = extwl(r, 4); /* row[6] */ + if (t != 0) { + t = sextw(t); + a0 += W6 * t; + a1 -= W2 * t; + a2 += W2 * t; + a3 -= W6 * t; + } + + t = extwl(l, 2); /* row[1] */ + if (t != 0) { + t = sextw(t); + b0 = W1 * t; + b1 = W3 * t; + b2 = W5 * t; + b3 = W7 * t; + } else { + b0 = 0; + b1 = 0; + b2 = 0; + b3 = 0; + } + + t = extwl(l, 6); /* row[3] */ + if (t) { + t = sextw(t); + b0 += W3 * t; + b1 -= W7 * t; + b2 -= W1 * t; + b3 -= W5 * t; + } + + + t = extwl(r, 2); /* row[5] */ + if (t) { + t = sextw(t); + b0 += W5 * t; + b1 -= W1 * t; + b2 += W7 * t; + b3 += W3 * t; + } + + t = extwl(r, 6); /* row[7] */ + if (t) { + t = sextw(t); + b0 += W7 * t; + b1 -= W5 * t; + b2 += W3 * t; + b3 -= W1 * t; + } + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; + + return 2; +} + +static inline void idct_col(DCTELEM *col) +{ + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + + col[0] += (1 << (COL_SHIFT - 1)) / W4; + + a0 = W4 * col[8 * 0]; + a1 = W4 * col[8 * 0]; + a2 = W4 * col[8 * 0]; + a3 = W4 * col[8 * 0]; + + if (col[8 * 2]) { + a0 += W2 * col[8 * 2]; + a1 += W6 * col[8 * 2]; + a2 -= W6 * col[8 * 2]; + a3 -= W2 * col[8 * 2]; + } + + if (col[8 * 4]) { + a0 += W4 * col[8 * 4]; + a1 -= W4 * col[8 * 4]; + a2 -= W4 * col[8 * 4]; + a3 += W4 * col[8 * 4]; + } + + if (col[8 * 6]) { + a0 += W6 * col[8 * 6]; + a1 -= W2 * col[8 * 6]; + a2 += W2 * col[8 * 6]; + a3 -= W6 * col[8 * 6]; + } + + if (col[8 * 1]) { + b0 = W1 * col[8 * 1]; + b1 = W3 * col[8 * 1]; + b2 = W5 * col[8 * 1]; + b3 = W7 * col[8 * 1]; + } else { + b0 = 0; + b1 = 0; + b2 = 0; + b3 = 0; + } + + if (col[8 * 3]) { + b0 += W3 * col[8 * 3]; + b1 -= W7 * col[8 * 3]; + b2 -= W1 * col[8 * 3]; + b3 -= W5 * col[8 * 3]; + } + + if (col[8 * 5]) { + b0 += W5 * col[8 * 5]; + b1 -= W1 * col[8 * 5]; + b2 += W7 * col[8 * 5]; + b3 += W3 * col[8 * 5]; + } + + if (col[8 * 7]) { + b0 += W7 * col[8 * 7]; + b1 -= W5 * col[8 * 7]; + b2 += W3 * col[8 * 7]; + b3 -= W1 * col[8 * 7]; + } + + col[8 * 0] = (a0 + b0) >> COL_SHIFT; + col[8 * 7] = (a0 - b0) >> COL_SHIFT; + col[8 * 1] = (a1 + b1) >> COL_SHIFT; + col[8 * 6] = (a1 - b1) >> COL_SHIFT; + col[8 * 2] = (a2 + b2) >> COL_SHIFT; + col[8 * 5] = (a2 - b2) >> COL_SHIFT; + col[8 * 3] = (a3 + b3) >> COL_SHIFT; + col[8 * 4] = (a3 - b3) >> COL_SHIFT; +} + +/* If all rows but the first one are zero after row transformation, + all rows will be identical after column transformation. */ +static inline void idct_col2(DCTELEM *col) +{ + int i; + uint64_t l, r; + + for (i = 0; i < 8; ++i) { + int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4; + + a0 *= W4; + col[i] = a0 >> COL_SHIFT; + } + + l = ldq(col + 0 * 4); r = ldq(col + 1 * 4); + stq(l, col + 2 * 4); stq(r, col + 3 * 4); + stq(l, col + 4 * 4); stq(r, col + 5 * 4); + stq(l, col + 6 * 4); stq(r, col + 7 * 4); + stq(l, col + 8 * 4); stq(r, col + 9 * 4); + stq(l, col + 10 * 4); stq(r, col + 11 * 4); + stq(l, col + 12 * 4); stq(r, col + 13 * 4); + stq(l, col + 14 * 4); stq(r, col + 15 * 4); +} + +void simple_idct_axp(DCTELEM *block) +{ + + int i; + int rowsZero = 1; /* all rows except row 0 zero */ + int rowsConstant = 1; /* all rows consist of a constant value */ + + for (i = 0; i < 8; i++) { + int sparseness = idct_row(block + 8 * i); + + if (i > 0 && sparseness > 0) + rowsZero = 0; + if (sparseness == 2) + rowsConstant = 0; + } + + if (rowsZero) { + idct_col2(block); + } else if (rowsConstant) { + idct_col(block); + for (i = 0; i < 8; i += 2) { + uint64_t v = (uint16_t) block[0]; + uint64_t w = (uint16_t) block[8]; + + v |= v << 16; + w |= w << 16; + v |= v << 32; + w |= w << 32; + stq(v, block + 0 * 4); + stq(v, block + 1 * 4); + stq(w, block + 2 * 4); + stq(w, block + 3 * 4); + block += 4 * 4; + } + } else { + for (i = 0; i < 8; i++) + idct_col(block + i); + } +} + +void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block) +{ + simple_idct_axp(block); + put_pixels_clamped_axp_p(block, dest, line_size); +} + +void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block) +{ + simple_idct_axp(block); + add_pixels_clamped_axp_p(block, dest, line_size); +} diff --git a/mpeg4/src/libavcodec/amr.c b/mpeg4/src/libavcodec/amr.c new file mode 100644 index 0000000000000000000000000000000000000000..6a354130c3becad1f7ff9ad646e81b2086c96f0b --- /dev/null +++ b/mpeg4/src/libavcodec/amr.c @@ -0,0 +1,664 @@ +/* + * AMR Audio decoder stub + * Copyright (c) 2003 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + /* + This code implements amr-nb and amr-wb audio encoder/decoder through external reference + code from www.3gpp.org. The licence of the code from 3gpp is unclear so you + have to download the code separately. Two versions exists: One fixed-point + and one with floats. For some reason the float-encoder is significant faster + atleast on a P4 1.5GHz (0.9s instead of 9.9s on a 30s audio clip at MR102). + Both float and fixed point is supported for amr-nb, but only float for + amr-wb. + + --AMR-NB-- + The fixed-point (TS26.073) can be downloaded from: + http://www.3gpp.org/ftp/Specs/archive/26_series/26.073/26073-510.zip + Extract the soure into ffmpeg/libavcodec/amr + To use the fixed version run "./configure" with "--enable-amr_nb-fixed" + + The float version (default) can be downloaded from: + http://www.3gpp.org/ftp/Specs/archive/26_series/26.104/26104-510.zip + Extract the soure into ffmpeg/libavcodec/amr_float + + The specification for amr-nb can be found in TS 26.071 + (http://www.3gpp.org/ftp/Specs/html-info/26071.htm) and some other + info at http://www.3gpp.org/ftp/Specs/html-info/26-series.htm + + --AMR-WB-- + The reference code can be downloaded from: + http://www.3gpp.org/ftp/Specs/archive/26_series/26.204/26204-510.zip + It should be extracted to "libavcodec/amrwb_float". Enable it with + "--enable-amr_wb". + + The specification for amr-wb can be downloaded from: + http://www.3gpp.org/ftp/Specs/archive/26_series/26.171/26171-500.zip + + If someone want to use the fixed point version it can be downloaded + from: http://www.3gpp.org/ftp/Specs/archive/26_series/26.173/26173-571.zip + + */ + +#include "avcodec.h" + +#ifdef AMR_NB_FIXED + +#define MMS_IO + +#include "amr/sp_dec.h" +#include "amr/d_homing.h" +#include "amr/typedef.h" +#include "amr/sp_enc.h" +#include "amr/sid_sync.h" +#include "amr/e_homing.h" + +#else +#include "amr_float/interf_dec.h" +#include "amr_float/interf_enc.h" +#endif + +/* Common code for fixed and float version*/ +typedef struct AMR_bitrates +{ + int startrate; + int stoprate; + enum Mode mode; + +} AMR_bitrates; + +/* Match desired bitrate with closest one*/ +static enum Mode getBitrateMode(int bitrate) +{ + /* Adjusted so that all bitrates can be used from commandline where + only a multiple of 1000 can be specified*/ + AMR_bitrates rates[]={ {0,4999,MR475}, //4 + {5000,5899,MR515},//5 + {5900,6699,MR59},//6 + {6700,7000,MR67},//7 + {7001,7949,MR74},//8 + {7950,9999,MR795},//9 + {10000,11999,MR102},//10 + {12000,64000,MR122},//12 + + }; + int i; + for(i=0;i<8;i++) + { + if(rates[i].startrate<=bitrate && rates[i].stoprate>=bitrate) + { + return(rates[i].mode); + } + } + /*Return highest possible*/ + return(MR122); +} + +#ifdef AMR_NB_FIXED +/* fixed point version*/ +/* frame size in serial bitstream file (frame type + serial stream + flags) */ +#define SERIAL_FRAMESIZE (1+MAX_SERIAL_SIZE+5) + +typedef struct AMRContext { + int frameCount; + Speech_Decode_FrameState *speech_decoder_state; + enum RXFrameType rx_type; + enum Mode mode; + Word16 reset_flag; + Word16 reset_flag_old; + + enum Mode enc_bitrate; + Speech_Encode_FrameState *enstate; + sid_syncState *sidstate; + enum TXFrameType tx_frametype; + + +} AMRContext; + +static int amr_nb_decode_init(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + s->frameCount=0; + s->speech_decoder_state=NULL; + s->rx_type = (enum RXFrameType)0; + s->mode= (enum Mode)0; + s->reset_flag=0; + s->reset_flag_old=1; + + if(Speech_Decode_Frame_init(&s->speech_decoder_state, "Decoder")) + { + av_log(avctx, AV_LOG_ERROR, "Speech_Decode_Frame_init error\n"); + return -1; + } + return 0; +} + +static int amr_nb_encode_init(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + s->frameCount=0; + s->speech_decoder_state=NULL; + s->rx_type = (enum RXFrameType)0; + s->mode= (enum Mode)0; + s->reset_flag=0; + s->reset_flag_old=1; + + if(avctx->sample_rate!=8000) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Only 8000Hz sample rate supported\n"); + } + return -1; + } + + if(avctx->channels!=1) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Only mono supported\n"); + } + return -1; + } + + avctx->frame_size=160; + avctx->coded_frame= avcodec_alloc_frame(); + + if(Speech_Encode_Frame_init(&s->enstate, 0, "encoder") || sid_sync_init (&s->sidstate)) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Speech_Encode_Frame_init error\n"); + } + return -1; + } + + s->enc_bitrate=getBitrateMode(avctx->bit_rate); + + return 0; +} + +static int amr_nb_encode_close(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + Speech_Encode_Frame_exit(&s->enstate); + sid_sync_exit (&s->sidstate); + av_freep(&avctx->coded_frame); + return 0; +} + +static int amr_nb_decode_close(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + Speech_Decode_Frame_exit(&s->speech_decoder_state); + return 0; +} + +static int amr_nb_decode_frame(AVCodecContext * avctx, + void *data, int *data_size, + uint8_t * buf, int buf_size) +{ + AMRContext *s = avctx->priv_data; + + uint8_t*amrData=buf; + int offset=0; + + UWord8 toc, q, ft; + + Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */ + Word16 *synth; + UWord8 *packed_bits; + + static Word16 packed_size[16] = {12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0}; + int i; + + //printf("amr_decode_frame data_size=%i buf=0x%X buf_size=%d frameCount=%d!!\n",*data_size,buf,buf_size,s->frameCount); + + synth=data; + +// while(offset> 2) & 0x01; + ft = (toc >> 3) & 0x0F; + + //printf("offset=%d, packet_size=%d amrData= 0x%X %X %X %X\n",offset,packed_size[ft],amrData[offset],amrData[offset+1],amrData[offset+2],amrData[offset+3]); + + offset++; + + packed_bits=amrData+offset; + + offset+=packed_size[ft]; + + //Unsort and unpack bits + s->rx_type = UnpackBits(q, ft, packed_bits, &s->mode, &serial[1]); + + //We have a new frame + s->frameCount++; + + if (s->rx_type == RX_NO_DATA) + { + s->mode = s->speech_decoder_state->prev_mode; + } + else { + s->speech_decoder_state->prev_mode = s->mode; + } + + /* if homed: check if this frame is another homing frame */ + if (s->reset_flag_old == 1) + { + /* only check until end of first subframe */ + s->reset_flag = decoder_homing_frame_test_first(&serial[1], s->mode); + } + /* produce encoder homing frame if homed & input=decoder homing frame */ + if ((s->reset_flag != 0) && (s->reset_flag_old != 0)) + { + for (i = 0; i < L_FRAME; i++) + { + synth[i] = EHF_MASK; + } + } + else + { + /* decode frame */ + Speech_Decode_Frame(s->speech_decoder_state, s->mode, &serial[1], s->rx_type, synth); + } + + //Each AMR-frame results in 160 16-bit samples + *data_size+=160*2; + synth+=160; + + /* if not homed: check whether current frame is a homing frame */ + if (s->reset_flag_old == 0) + { + /* check whole frame */ + s->reset_flag = decoder_homing_frame_test(&serial[1], s->mode); + } + /* reset decoder if current frame is a homing frame */ + if (s->reset_flag != 0) + { + Speech_Decode_Frame_reset(s->speech_decoder_state); + } + s->reset_flag_old = s->reset_flag; + + } + return offset; +} + + +static int amr_nb_encode_frame(AVCodecContext *avctx, + unsigned char *frame/*out*/, int buf_size, void *data/*in*/) +{ + short serial_data[250] = {0}; + + AMRContext *s = avctx->priv_data; + int written; + + s->reset_flag = encoder_homing_frame_test(data); + + Speech_Encode_Frame(s->enstate, s->enc_bitrate, data, &serial_data[1], &s->mode); + + /* add frame type and mode */ + sid_sync (s->sidstate, s->mode, &s->tx_frametype); + + written = PackBits(s->mode, s->enc_bitrate, s->tx_frametype, &serial_data[1], frame); + + if (s->reset_flag != 0) + { + Speech_Encode_Frame_reset(s->enstate); + sid_sync_reset(s->sidstate); + } + return written; +} + + +#elif defined(AMR_NB) /* Float point version*/ + +typedef struct AMRContext { + int frameCount; + void * decState; + int *enstate; + enum Mode enc_bitrate; +} AMRContext; + +static int amr_nb_decode_init(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + s->frameCount=0; + s->decState=Decoder_Interface_init(); + if(!s->decState) + { + av_log(avctx, AV_LOG_ERROR, "Decoder_Interface_init error\r\n"); + return -1; + } + return 0; +} + +static int amr_nb_encode_init(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + s->frameCount=0; + + if(avctx->sample_rate!=8000) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Only 8000Hz sample rate supported\n"); + } + return -1; + } + + if(avctx->channels!=1) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Only mono supported\n"); + } + return -1; + } + + avctx->frame_size=160; + avctx->coded_frame= avcodec_alloc_frame(); + + s->enstate=Encoder_Interface_init(0); + if(!s->enstate) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Encoder_Interface_init error\n"); + } + return -1; + } + + s->enc_bitrate=getBitrateMode(avctx->bit_rate); + + return 0; +} + +static int amr_nb_decode_close(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + Decoder_Interface_exit(s->decState); + return 0; +} + +static int amr_nb_encode_close(AVCodecContext * avctx) +{ + AMRContext *s = avctx->priv_data; + Encoder_Interface_exit(s->enstate); + av_freep(&avctx->coded_frame); + return 0; +} + +static int amr_nb_decode_frame(AVCodecContext * avctx, + void *data, int *data_size, + uint8_t * buf, int buf_size) +{ + AMRContext *s = (AMRContext*)avctx->priv_data; + + uint8_t*amrData=buf; + static short block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 }; + enum Mode dec_mode; + int packet_size; + + /* av_log(NULL,AV_LOG_DEBUG,"amr_decode_frame buf=%p buf_size=%d frameCount=%d!!\n",buf,buf_size,s->frameCount); */ + + if(buf_size==0) { + /* nothing to do */ + return 0; + } + + dec_mode = (buf[0] >> 3) & 0x000F; + packet_size = block_size[dec_mode]+1; + + if(packet_size > buf_size) { + av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size); + return -1; + } + + s->frameCount++; + /* av_log(NULL,AV_LOG_DEBUG,"packet_size=%d amrData= 0x%X %X %X %X\n",packet_size,amrData[0],amrData[1],amrData[2],amrData[3]); */ + /* call decoder */ + Decoder_Interface_Decode(s->decState, amrData, data, 0); + *data_size=160*2; + + return packet_size; +} + +static int amr_nb_encode_frame(AVCodecContext *avctx, + unsigned char *frame/*out*/, int buf_size, void *data/*in*/) +{ + AMRContext *s = (AMRContext*)avctx->priv_data; + int written; + + written = Encoder_Interface_Encode(s->enstate, + s->enc_bitrate, + data, + frame, + 0); + /* av_log(NULL,AV_LOG_DEBUG,"amr_nb_encode_frame encoded %u bytes, bitrate %u, first byte was %#02x\n",written, s->enc_bitrate, frame[0] ); */ + + return written; +} + +#endif + +#if defined(AMR_NB) || defined(AMR_NB_FIXED) + +AVCodec amr_nb_decoder = +{ + "amr_nb", + CODEC_TYPE_AUDIO, + CODEC_ID_AMR_NB, + sizeof(AMRContext), + amr_nb_decode_init, + NULL, + amr_nb_decode_close, + amr_nb_decode_frame, +}; + +AVCodec amr_nb_encoder = +{ + "amr_nb", + CODEC_TYPE_AUDIO, + CODEC_ID_AMR_NB, + sizeof(AMRContext), + amr_nb_encode_init, + amr_nb_encode_frame, + amr_nb_encode_close, + NULL, +}; + +#endif + +/* -----------AMR wideband ------------*/ +#ifdef AMR_WB + +#ifdef _TYPEDEF_H +//To avoid duplicate typedefs from typdef in amr-nb +#define typedef_h +#endif + +#include "amrwb_float/enc_if.h" +#include "amrwb_float/dec_if.h" + +/* Common code for fixed and float version*/ +typedef struct AMRWB_bitrates +{ + int startrate; + int stoprate; + int mode; + +} AMRWB_bitrates; + +static int getWBBitrateMode(int bitrate) +{ + /* Adjusted so that all bitrates can be used from commandline where + only a multiple of 1000 can be specified*/ + AMRWB_bitrates rates[]={ {0,7999,0}, //6.6kHz + {8000,9999,1},//8.85 + {10000,13000,2},//12.65 + {13001,14999,3},//14.25 + {15000,17000,4},//15.85 + {17001,18000,5},//18.25 + {18001,22000,6},//19.85 + {22001,23000,7},//23.05 + {23001,24000,8},//23.85 + + }; + int i; + + for(i=0;i<9;i++) + { + if(rates[i].startrate<=bitrate && rates[i].stoprate>=bitrate) + { + return(rates[i].mode); + } + } + /*Return highest possible*/ + return(8); +} + + +typedef struct AMRWBContext { + int frameCount; + void *state; + int mode; + Word16 allow_dtx; +} AMRWBContext; + +static int amr_wb_encode_init(AVCodecContext * avctx) +{ + AMRWBContext *s = (AMRWBContext*)avctx->priv_data; + s->frameCount=0; + + if(avctx->sample_rate!=16000) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Only 16000Hz sample rate supported\n"); + } + return -1; + } + + if(avctx->channels!=1) + { + if(avctx->debug) + { + av_log(avctx, AV_LOG_DEBUG, "Only mono supported\n"); + } + return -1; + } + + avctx->frame_size=320; + avctx->coded_frame= avcodec_alloc_frame(); + + s->state = E_IF_init(); + s->mode=getWBBitrateMode(avctx->bit_rate); + s->allow_dtx=0; + + return 0; +} + +static int amr_wb_encode_close(AVCodecContext * avctx) +{ + AMRWBContext *s = (AMRWBContext*) avctx->priv_data; + E_IF_exit(s->state); + av_freep(&avctx->coded_frame); + s->frameCount++; + return 0; +} + +static int amr_wb_encode_frame(AVCodecContext *avctx, + unsigned char *frame/*out*/, int buf_size, void *data/*in*/) +{ + AMRWBContext *s = (AMRWBContext*) avctx->priv_data; + int size = E_IF_encode(s->state, s->mode, data, frame, s->allow_dtx); + return size; +} + +static int amr_wb_decode_init(AVCodecContext * avctx) +{ + AMRWBContext *s = (AMRWBContext *)avctx->priv_data; + s->frameCount=0; + s->state = D_IF_init(); + return 0; +} + +extern const UWord8 block_size[]; + +static int amr_wb_decode_frame(AVCodecContext * avctx, + void *data, int *data_size, + uint8_t * buf, int buf_size) +{ + AMRWBContext *s = (AMRWBContext*)avctx->priv_data; + + uint8_t*amrData=buf; + int mode; + int packet_size; + + if(buf_size==0) { + /* nothing to do */ + return 0; + } + + mode = (amrData[0] >> 3) & 0x000F; + packet_size = block_size[mode]; + + if(packet_size > buf_size) { + av_log(avctx, AV_LOG_ERROR, "amr frame too short (%u, should be %u)\n", buf_size, packet_size+1); + return -1; + } + + s->frameCount++; + D_IF_decode( s->state, amrData, data, _good_frame); + *data_size=320*2; + return packet_size; +} + +static int amr_wb_decode_close(AVCodecContext * avctx) +{ + AMRWBContext *s = (AMRWBContext *)avctx->priv_data; + D_IF_exit(s->state); + return 0; +} + +AVCodec amr_wb_decoder = +{ + "amr_wb", + CODEC_TYPE_AUDIO, + CODEC_ID_AMR_WB, + sizeof(AMRWBContext), + amr_wb_decode_init, + NULL, + amr_wb_decode_close, + amr_wb_decode_frame, +}; + +AVCodec amr_wb_encoder = +{ + "amr_wb", + CODEC_TYPE_AUDIO, + CODEC_ID_AMR_WB, + sizeof(AMRWBContext), + amr_wb_encode_init, + amr_wb_encode_frame, + amr_wb_encode_close, + NULL, +}; + +#endif //AMR_WB diff --git a/mpeg4/src/libavcodec/apiexample.c b/mpeg4/src/libavcodec/apiexample.c new file mode 100644 index 0000000000000000000000000000000000000000..7fe1c119d40b541bc264ed9f228313d736fa1c27 --- /dev/null +++ b/mpeg4/src/libavcodec/apiexample.c @@ -0,0 +1,437 @@ +/** + * @file apiexample.c + * avcodec API use example. + * + * Note that this library only handles codecs (mpeg, mpeg4, etc...), + * not file formats (avi, vob, etc...). See library 'libavformat' for the + * format handling + */ + +#include +#include +#include +#include + +#ifdef HAVE_AV_CONFIG_H +#undef HAVE_AV_CONFIG_H +#endif + +#include "avcodec.h" + +#define INBUF_SIZE 4096 + +/* + * Audio encoding example + */ +void audio_encode_example(const char *filename) +{ + AVCodec *codec; + AVCodecContext *c= NULL; + int frame_size, i, j, out_size, outbuf_size; + FILE *f; + short *samples; + float t, tincr; + uint8_t *outbuf; + + printf("Audio encoding\n"); + + /* find the MP2 encoder */ + codec = avcodec_find_encoder(CODEC_ID_MP2); + if (!codec) { + fprintf(stderr, "codec not found\n"); + exit(1); + } + + c= avcodec_alloc_context(); + + /* put sample parameters */ + c->bit_rate = 64000; + c->sample_rate = 44100; + c->channels = 2; + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + /* the codec gives us the frame size, in samples */ + frame_size = c->frame_size; + samples = malloc(frame_size * 2 * c->channels); + outbuf_size = 10000; + outbuf = malloc(outbuf_size); + + f = fopen(filename, "wb"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + + /* encode a single tone sound */ + t = 0; + tincr = 2 * M_PI * 440.0 / c->sample_rate; + for(i=0;i<200;i++) { + for(j=0;j 0) { + len = avcodec_decode_audio(c, (short *)outbuf, &out_size, + inbuf_ptr, size); + if (len < 0) { + fprintf(stderr, "Error while decoding\n"); + exit(1); + } + if (out_size > 0) { + /* if a frame has been decoded, output it */ + fwrite(outbuf, 1, out_size, outfile); + } + size -= len; + inbuf_ptr += len; + } + } + + fclose(outfile); + fclose(f); + free(outbuf); + + avcodec_close(c); + av_free(c); +} + +/* + * Video encoding example + */ +void video_encode_example(const char *filename) +{ + AVCodec *codec; + AVCodecContext *c= NULL; + int i, out_size, size, x, y, outbuf_size; + FILE *f; + AVFrame *picture; + uint8_t *outbuf, *picture_buf; + + printf("Video encoding\n"); + + /* find the mpeg1 video encoder */ + codec = avcodec_find_encoder(CODEC_ID_MPEG1VIDEO); + if (!codec) { + fprintf(stderr, "codec not found\n"); + exit(1); + } + + c= avcodec_alloc_context(); + picture= avcodec_alloc_frame(); + + /* put sample parameters */ + c->bit_rate = 400000; + /* resolution must be a multiple of two */ + c->width = 352; + c->height = 288; + /* frames per second */ + c->time_base= (AVRational){1,25}; + c->gop_size = 10; /* emit one intra frame every ten frames */ + c->max_b_frames=1; + c->pix_fmt = PIX_FMT_YUV420P; + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + /* the codec gives us the frame size, in samples */ + + f = fopen(filename, "wb"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + + /* alloc image and output buffer */ + outbuf_size = 100000; + outbuf = malloc(outbuf_size); + size = c->width * c->height; + picture_buf = malloc((size * 3) / 2); /* size for YUV 420 */ + + picture->data[0] = picture_buf; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size / 4; + picture->linesize[0] = c->width; + picture->linesize[1] = c->width / 2; + picture->linesize[2] = c->width / 2; + + /* encode 1 second of video */ + for(i=0;i<25;i++) { + fflush(stdout); + /* prepare a dummy image */ + /* Y */ + for(y=0;yheight;y++) { + for(x=0;xwidth;x++) { + picture->data[0][y * picture->linesize[0] + x] = x + y + i * 3; + } + } + + /* Cb and Cr */ + for(y=0;yheight/2;y++) { + for(x=0;xwidth/2;x++) { + picture->data[1][y * picture->linesize[1] + x] = 128 + y + i * 2; + picture->data[2][y * picture->linesize[2] + x] = 64 + x + i * 5; + } + } + + /* encode the image */ + out_size = avcodec_encode_video(c, outbuf, outbuf_size, picture); + printf("encoding frame %3d (size=%5d)\n", i, out_size); + fwrite(outbuf, 1, out_size, f); + } + + /* get the delayed frames */ + for(; out_size; i++) { + fflush(stdout); + + out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL); + printf("write frame %3d (size=%5d)\n", i, out_size); + fwrite(outbuf, 1, out_size, f); + } + + /* add sequence end code to have a real mpeg file */ + outbuf[0] = 0x00; + outbuf[1] = 0x00; + outbuf[2] = 0x01; + outbuf[3] = 0xb7; + fwrite(outbuf, 1, 4, f); + fclose(f); + free(picture_buf); + free(outbuf); + + avcodec_close(c); + av_free(c); + av_free(picture); + printf("\n"); +} + +/* + * Video decoding example + */ + +void pgm_save(unsigned char *buf,int wrap, int xsize,int ysize,char *filename) +{ + FILE *f; + int i; + + f=fopen(filename,"w"); + fprintf(f,"P5\n%d %d\n%d\n",xsize,ysize,255); + for(i=0;icapabilities&CODEC_CAP_TRUNCATED) + c->flags|= CODEC_FLAG_TRUNCATED; /* we dont send complete frames */ + + /* for some codecs, such as msmpeg4 and mpeg4, width and height + MUST be initialized there because these info are not available + in the bitstream */ + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + /* the codec gives us the frame size, in samples */ + + f = fopen(filename, "rb"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + + frame = 0; + for(;;) { + size = fread(inbuf, 1, INBUF_SIZE, f); + if (size == 0) + break; + + /* NOTE1: some codecs are stream based (mpegvideo, mpegaudio) + and this is the only method to use them because you cannot + know the compressed data size before analysing it. + + BUT some other codecs (msmpeg4, mpeg4) are inherently frame + based, so you must call them with all the data for one + frame exactly. You must also initialize 'width' and + 'height' before initializing them. */ + + /* NOTE2: some codecs allow the raw parameters (frame size, + sample rate) to be changed at any frame. We handle this, so + you should also take care of it */ + + /* here, we use a stream based decoder (mpeg1video), so we + feed decoder and see if it could decode a frame */ + inbuf_ptr = inbuf; + while (size > 0) { + len = avcodec_decode_video(c, picture, &got_picture, + inbuf_ptr, size); + if (len < 0) { + fprintf(stderr, "Error while decoding frame %d\n", frame); + exit(1); + } + if (got_picture) { + printf("saving frame %3d\n", frame); + fflush(stdout); + + /* the picture is allocated by the decoder. no need to + free it */ + snprintf(buf, sizeof(buf), outfilename, frame); + pgm_save(picture->data[0], picture->linesize[0], + c->width, c->height, buf); + frame++; + } + size -= len; + inbuf_ptr += len; + } + } + + /* some codecs, such as MPEG, transmit the I and P frame with a + latency of one frame. You must do the following to have a + chance to get the last frame of the video */ + len = avcodec_decode_video(c, picture, &got_picture, + NULL, 0); + if (got_picture) { + printf("saving last frame %3d\n", frame); + fflush(stdout); + + /* the picture is allocated by the decoder. no need to + free it */ + snprintf(buf, sizeof(buf), outfilename, frame); + pgm_save(picture->data[0], picture->linesize[0], + c->width, c->height, buf); + frame++; + } + + fclose(f); + + avcodec_close(c); + av_free(c); + av_free(picture); + printf("\n"); +} + +int main(int argc, char **argv) +{ + const char *filename; + + /* must be called before using avcodec lib */ + avcodec_init(); + + /* register all the codecs (you can also register only the codec + you wish to have smaller code */ + avcodec_register_all(); + + if (argc <= 1) { + audio_encode_example("/tmp/test.mp2"); + audio_decode_example("/tmp/test.sw", "/tmp/test.mp2"); + + video_encode_example("/tmp/test.mpg"); + filename = "/tmp/test.mpg"; + } else { + filename = argv[1]; + } + + // audio_decode_example("/tmp/test.sw", filename); + video_decode_example("/tmp/test%d.pgm", filename); + + return 0; +} diff --git a/mpeg4/src/libavcodec/armv4l/dsputil_arm.c b/mpeg4/src/libavcodec/armv4l/dsputil_arm.c new file mode 100644 index 0000000000000000000000000000000000000000..cebd176b3a8241b55e84bcb82112af946135a7ea --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/dsputil_arm.c @@ -0,0 +1,246 @@ +/* + * ARMv4L optimized DSP utils + * Copyright (c) 2001 Lionel Ulmer. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../dsputil.h" +#ifdef HAVE_IPP +#include "ipp.h" +#endif + +extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); + +extern void j_rev_dct_ARM(DCTELEM *data); +extern void simple_idct_ARM(DCTELEM *data); + +/* XXX: local hack */ +static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); +static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); + +void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); + +void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); +void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); + +void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); + +CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8) +CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8) +CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8) +CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8) +CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8) +CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) + +static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size) +{ + asm volatile ( + "mov r10, #8 \n\t" + + "1: \n\t" + + /* load dest */ + "ldr r4, [%1] \n\t" + /* block[0] and block[1]*/ + "ldrsh r5, [%0] \n\t" + "ldrsh r7, [%0, #2] \n\t" + "and r6, r4, #0xFF \n\t" + "and r8, r4, #0xFF00 \n\t" + "add r6, r5, r6 \n\t" + "add r8, r7, r8, lsr #8 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "mov r9, r6 \n\t" + "ldrsh r5, [%0, #4] \n\t" /* moved form [A] */ + "orr r9, r9, r8, lsl #8 \n\t" + /* block[2] and block[3] */ + /* [A] */ + "ldrsh r7, [%0, #6] \n\t" + "and r6, r4, #0xFF0000 \n\t" + "and r8, r4, #0xFF000000 \n\t" + "add r6, r5, r6, lsr #16 \n\t" + "add r8, r7, r8, lsr #24 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "orr r9, r9, r6, lsl #16 \n\t" + "ldr r4, [%1, #4] \n\t" /* moved form [B] */ + "orr r9, r9, r8, lsl #24 \n\t" + /* store dest */ + "ldrsh r5, [%0, #8] \n\t" /* moved form [C] */ + "str r9, [%1] \n\t" + + /* load dest */ + /* [B] */ + /* block[4] and block[5] */ + /* [C] */ + "ldrsh r7, [%0, #10] \n\t" + "and r6, r4, #0xFF \n\t" + "and r8, r4, #0xFF00 \n\t" + "add r6, r5, r6 \n\t" + "add r8, r7, r8, lsr #8 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "mov r9, r6 \n\t" + "ldrsh r5, [%0, #12] \n\t" /* moved from [D] */ + "orr r9, r9, r8, lsl #8 \n\t" + /* block[6] and block[7] */ + /* [D] */ + "ldrsh r7, [%0, #14] \n\t" + "and r6, r4, #0xFF0000 \n\t" + "and r8, r4, #0xFF000000 \n\t" + "add r6, r5, r6, lsr #16 \n\t" + "add r8, r7, r8, lsr #24 \n\t" + "mvn r5, r5 \n\t" + "mvn r7, r7 \n\t" + "tst r6, #0x100 \n\t" + "movne r6, r5, lsr #24 \n\t" + "tst r8, #0x100 \n\t" + "movne r8, r7, lsr #24 \n\t" + "orr r9, r9, r6, lsl #16 \n\t" + "add %0, %0, #16 \n\t" /* moved from [E] */ + "orr r9, r9, r8, lsl #24 \n\t" + "subs r10, r10, #1 \n\t" /* moved from [F] */ + /* store dest */ + "str r9, [%1, #4] \n\t" + + /* [E] */ + /* [F] */ + "add %1, %1, %2 \n\t" + "bne 1b \n\t" + : "+r"(block), + "+r"(dest) + : "r"(line_size) + : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); +} + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct_ARM (block); + ff_put_pixels_clamped(block, dest, line_size); +} +static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct_ARM (block); + ff_add_pixels_clamped(block, dest, line_size); +} +static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + simple_idct_ARM (block); + ff_put_pixels_clamped(block, dest, line_size); +} +static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + simple_idct_ARM (block); + ff_add_pixels_clamped(block, dest, line_size); +} +static void simple_idct_ipp(DCTELEM *block) +{ +#ifdef HAVE_IPP + ippiDCT8x8Inv_Video_16s_C1I(block); +#endif +} +static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block) +{ +#ifdef HAVE_IPP + ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size); +#endif +} + +void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size); + +static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) +{ +#ifdef HAVE_IPP + ippiDCT8x8Inv_Video_16s_C1I(block); +#ifdef HAVE_IWMMXT + add_pixels_clamped_iwmmxt(block, dest, line_size); +#else + add_pixels_clamped_ARM(block, dest, line_size); +#endif +#endif +} + +void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) +{ + const int idct_algo= avctx->idct_algo; + + ff_put_pixels_clamped = c->put_pixels_clamped; + ff_add_pixels_clamped = c->add_pixels_clamped; + +#ifdef HAVE_IPP + if(idct_algo==FF_IDCT_ARM){ +#else + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){ +#endif + c->idct_put= j_rev_dct_ARM_put; + c->idct_add= j_rev_dct_ARM_add; + c->idct = j_rev_dct_ARM; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ + } else if (idct_algo==FF_IDCT_SIMPLEARM){ + c->idct_put= simple_idct_ARM_put; + c->idct_add= simple_idct_ARM_add; + c->idct = simple_idct_ARM; + c->idct_permutation_type= FF_NO_IDCT_PERM; +#ifdef HAVE_IPP + } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){ +#else + } else if (idct_algo==FF_IDCT_IPP){ +#endif + c->idct_put= simple_idct_ipp_put; + c->idct_add= simple_idct_ipp_add; + c->idct = simple_idct_ipp; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } + +/* c->put_pixels_tab[0][0] = put_pixels16_arm; */ // NG! + c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK! + c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK! +/* c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; /\* NG *\/ */ +/* c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; // ?(Ȥʤ) */ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; // OK + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; //OK +/* c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; //NG */ + c->put_pixels_tab[1][0] = put_pixels8_arm; //OK + c->put_pixels_tab[1][1] = put_pixels8_x2_arm; //OK +/* c->put_pixels_tab[1][2] = put_pixels8_y2_arm; //NG */ +/* c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; //NG */ + c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;//OK + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; //OK + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK +/* c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;//NG */ + +#ifdef HAVE_IWMMXT + dsputil_init_iwmmxt(c, avctx); +#endif +} diff --git a/mpeg4/src/libavcodec/armv4l/dsputil_arm_s.S b/mpeg4/src/libavcodec/armv4l/dsputil_arm_s.S new file mode 100644 index 0000000000000000000000000000000000000000..8d64a1af12bc468a6cd6bfaf2080af9e32a642b1 --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/dsputil_arm_s.S @@ -0,0 +1,694 @@ +@ +@ ARMv4L optimized DSP utils +@ Copyright (c) 2004 AGAWA Koji +@ +@ This library is free software; you can redistribute it and/or +@ modify it under the terms of the GNU Lesser General Public +@ License as published by the Free Software Foundation; either +@ version 2 of the License, or (at your option) any later version. +@ +@ This library is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY; without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +@ Lesser General Public License for more details. +@ +@ You should have received a copy of the GNU Lesser General Public +@ License along with this library; if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +@ + +.macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4 + mov \Rd0, \Rn0, lsr #(\shift * 8) + mov \Rd1, \Rn1, lsr #(\shift * 8) + mov \Rd2, \Rn2, lsr #(\shift * 8) + mov \Rd3, \Rn3, lsr #(\shift * 8) + orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8) + orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8) + orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8) + orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8) +.endm +.macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2 + mov \R0, \R0, lsr #(\shift * 8) + orr \R0, \R0, \R1, lsl #(32 - \shift * 8) + mov \R1, \R1, lsr #(\shift * 8) + orr \R1, \R1, \R2, lsl #(32 - \shift * 8) +.endm +.macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2 + mov \Rdst0, \Rsrc0, lsr #(\shift * 8) + mov \Rdst1, \Rsrc1, lsr #(\shift * 8) + orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8)) + orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8)) +.endm + +.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask + @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) + @ Rmask = 0xFEFEFEFE + @ Rn = destroy + eor \Rd0, \Rn0, \Rm0 + eor \Rd1, \Rn1, \Rm1 + orr \Rn0, \Rn0, \Rm0 + orr \Rn1, \Rn1, \Rm1 + and \Rd0, \Rd0, \Rmask + and \Rd1, \Rd1, \Rmask + sub \Rd0, \Rn0, \Rd0, lsr #1 + sub \Rd1, \Rn1, \Rd1, lsr #1 +.endm + +.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask + @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1) + @ Rmask = 0xFEFEFEFE + @ Rn = destroy + eor \Rd0, \Rn0, \Rm0 + eor \Rd1, \Rn1, \Rm1 + and \Rn0, \Rn0, \Rm0 + and \Rn1, \Rn1, \Rm1 + and \Rd0, \Rd0, \Rmask + and \Rd1, \Rd1, \Rmask + add \Rd0, \Rn0, \Rd0, lsr #1 + add \Rd1, \Rn1, \Rd1, lsr #1 +.endm + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels16_arm +put_pixels16_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11, lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + bic r1, r1, #3 + add r5, r5, r4, lsl #2 + ldrne pc, [r5] +1: + ldmia r1, {r4-r7} + add r1, r1, r2 + stmia r0, {r4-r7} + pld [r1] + subs r3, r3, #1 + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r11, pc} + .align 8 +2: + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8 + pld [r1] + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r11, pc} + .align 8 +3: + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8 + pld [r1] + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r11, pc} + .align 8 +4: + ldmia r1, {r4-r8} + add r1, r1, r2 + ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8 + pld [r1] + subs r3, r3, #1 + stmia r0, {r9-r12} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r11,pc} + .align 8 +5: + .word 1b + .word 2b + .word 3b + .word 4b + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels8_arm +put_pixels8_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r5,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + bic r1, r1, #3 + add r5, r5, r4, lsl #2 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5} + add r1, r1, r2 + subs r3, r3, #1 + pld [r1] + stmia r0, {r4-r5} + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r5,pc} + .align 8 +2: + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12 + pld [r1] + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r5,pc} + .align 8 +3: + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12 + pld [r1] + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r5,pc} + .align 8 +4: + ldmia r1, {r4-r5, r12} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12 + pld [r1] + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r5,pc} + .align 8 +5: + .word 1b + .word 2b + .word 3b + .word 4b + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels8_x2_arm +put_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + pld [r1] + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r10,pc} + .align 8 +2: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 + pld [r1] + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r10,pc} + .align 8 +3: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 + pld [r1] + RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r10,pc} + .align 8 +4: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 + pld [r1] + RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + + .align 8 + .global put_no_rnd_pixels8_x2_arm +put_no_rnd_pixels8_x2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r10,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 1b + ldmfd sp!, {r4-r10,pc} + .align 8 +2: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 2b + ldmfd sp!, {r4-r10,pc} + .align 8 +3: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 3b + ldmfd sp!, {r4-r10,pc} + .align 8 +4: + ldmia r1, {r4-r5, r10} + add r1, r1, r2 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10 + pld [r1] + NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 4b + ldmfd sp!, {r4-r10,pc} @@ update PC with LR content. + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + + +@ ---------------------------------------------------------------- + .align 8 + .global put_pixels8_y2_arm +put_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + mov r3, r3, lsr #1 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5} + add r1, r1, r2 +6: ldmia r1, {r6-r7} + add r1, r1, r2 + pld [r1] + RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 + pld [r1] + RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +2: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +3: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +4: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + + .align 8 + .global put_no_rnd_pixels8_y2_arm +put_no_rnd_pixels8_y2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adr r5, 5f + ands r4, r1, #3 + mov r3, r3, lsr #1 + ldr r12, [r5] + add r5, r5, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + ldmia r1, {r4-r5} + add r1, r1, r2 +6: ldmia r1, {r6-r7} + add r1, r1, r2 + pld [r1] + NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12 + ldmia r1, {r4-r5} + add r1, r1, r2 + stmia r0, {r8-r9} + add r0, r0, r2 + pld [r1] + NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12 + subs r3, r3, #1 + stmia r0, {r8-r9} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +2: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +3: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +4: + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 +6: ldmia r1, {r7-r9} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9 + NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + ldmia r1, {r4-r6} + add r1, r1, r2 + pld [r1] + ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6 + subs r3, r3, #1 + NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12 + stmia r0, {r10-r11} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} + .align 8 +5: + .word 0xFEFEFEFE + .word 2b + .word 3b + .word 4b + +@ ---------------------------------------------------------------- +.macro RND_XY2_IT align, rnd + @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202) + @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2) +.if \align == 0 + ldmia r1, {r6-r8} +.elseif \align == 3 + ldmia r1, {r5-r7} +.else + ldmia r1, {r8-r10} +.endif + add r1, r1, r2 + pld [r1] +.if \align == 0 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8 +.elseif \align == 1 + ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10 + ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10 +.elseif \align == 2 + ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10 + ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10 +.elseif \align == 3 + ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7 +.endif + ldr r14, [r12, #0] @ 0x03030303 + tst r3, #1 + and r8, r4, r14 + and r9, r5, r14 + and r10, r6, r14 + and r11, r7, r14 +.if \rnd == 1 + ldreq r14, [r12, #16] @ 0x02020202 +.else + ldreq r14, [r12, #28] @ 0x01010101 +.endif + add r8, r8, r10 + add r9, r9, r11 + addeq r8, r8, r14 + addeq r9, r9, r14 + ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2 + and r4, r14, r4, lsr #2 + and r5, r14, r5, lsr #2 + and r6, r14, r6, lsr #2 + and r7, r14, r7, lsr #2 + add r10, r4, r6 + add r11, r5, r7 +.endm + +.macro RND_XY2_EXPAND align, rnd + RND_XY2_IT \align, \rnd +6: stmfd sp!, {r8-r11} + RND_XY2_IT \align, \rnd + ldmfd sp!, {r4-r7} + add r4, r4, r8 + add r5, r5, r9 + add r6, r6, r10 + add r7, r7, r11 + ldr r14, [r12, #24] @ 0x0F0F0F0F + and r4, r14, r4, lsr #2 + and r5, r14, r5, lsr #2 + add r4, r4, r6 + add r5, r5, r7 + subs r3, r3, #1 + stmia r0, {r4-r5} + add r0, r0, r2 + bne 6b + ldmfd sp!, {r4-r11,pc} +.endm + + .align 8 + .global put_pixels8_xy2_arm +put_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 + add r5, r12, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + RND_XY2_EXPAND 0, 1 + + .align 8 +2: + RND_XY2_EXPAND 1, 1 + + .align 8 +3: + RND_XY2_EXPAND 2, 1 + + .align 8 +4: + RND_XY2_EXPAND 3, 1 + +5: + .word 0x03030303 + .word 2b + .word 3b + .word 4b + .word 0x02020202 + .word 0xFCFCFCFC >> 2 + .word 0x0F0F0F0F + .word 0x01010101 + + .align 8 + .global put_no_rnd_pixels8_xy2_arm +put_no_rnd_pixels8_xy2_arm: + @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h) + @ block = word aligned, pixles = unaligned + pld [r1] + stmfd sp!, {r4-r11,lr} @ R14 is also called LR + adrl r12, 5f + ands r4, r1, #3 + add r5, r12, r4, lsl #2 + bic r1, r1, #3 + ldrne pc, [r5] +1: + RND_XY2_EXPAND 0, 0 + + .align 8 +2: + RND_XY2_EXPAND 1, 0 + + .align 8 +3: + RND_XY2_EXPAND 2, 0 + + .align 8 +4: + RND_XY2_EXPAND 3, 0 + +5: + .word 0x03030303 + .word 2b + .word 3b + .word 4b + .word 0x02020202 + .word 0xFCFCFCFC >> 2 + .word 0x0F0F0F0F + .word 0x01010101 diff --git a/mpeg4/src/libavcodec/armv4l/dsputil_iwmmxt.c b/mpeg4/src/libavcodec/armv4l/dsputil_iwmmxt.c new file mode 100644 index 0000000000000000000000000000000000000000..460b7a84c37f682e98266118e861b078dde4ee24 --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/dsputil_iwmmxt.c @@ -0,0 +1,186 @@ +/* + * iWMMXt optimized DSP utils + * Copyright (c) 2004 AGAWA Koji + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../dsputil.h" + +#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt +#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12"); +#define WAVG2B "wavg2b" +#include "dsputil_iwmmxt_rnd.h" +#undef DEF +#undef SET_RND +#undef WAVG2B + +#define DEF(x, y) x ## _ ## y ##_iwmmxt +#define SET_RND(regd) __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12"); +#define WAVG2B "wavg2br" +#include "dsputil_iwmmxt_rnd.h" +#undef DEF +#undef SET_RND +#undef WAVG2BR + +// need scheduling +#define OP(AVG) \ + asm volatile ( \ + /* alignment */ \ + "and r12, %[pixels], #7 \n\t" \ + "bic %[pixels], %[pixels], #7 \n\t" \ + "tmcr wcgr1, r12 \n\t" \ + \ + "wldrd wr0, [%[pixels]] \n\t" \ + "wldrd wr1, [%[pixels], #8] \n\t" \ + "add %[pixels], %[pixels], %[line_size] \n\t" \ + "walignr1 wr4, wr0, wr1 \n\t" \ + \ + "1: \n\t" \ + \ + "wldrd wr2, [%[pixels]] \n\t" \ + "wldrd wr3, [%[pixels], #8] \n\t" \ + "add %[pixels], %[pixels], %[line_size] \n\t" \ + "pld [%[pixels]] \n\t" \ + "walignr1 wr5, wr2, wr3 \n\t" \ + AVG " wr6, wr4, wr5 \n\t" \ + "wstrd wr6, [%[block]] \n\t" \ + "add %[block], %[block], %[line_size] \n\t" \ + \ + "wldrd wr0, [%[pixels]] \n\t" \ + "wldrd wr1, [%[pixels], #8] \n\t" \ + "add %[pixels], %[pixels], %[line_size] \n\t" \ + "walignr1 wr4, wr0, wr1 \n\t" \ + "pld [%[pixels]] \n\t" \ + AVG " wr6, wr4, wr5 \n\t" \ + "wstrd wr6, [%[block]] \n\t" \ + "add %[block], %[block], %[line_size] \n\t" \ + \ + "subs %[h], %[h], #2 \n\t" \ + "bne 1b \n\t" \ + : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \ + : [line_size]"r"(line_size) \ + : "memory", "r12"); +void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + OP("wavg2br"); +} +void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + OP("wavg2b"); +} +#undef OP + +void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size) +{ + uint8_t *pixels2 = pixels + line_size; + + __asm__ __volatile__ ( + "mov r12, #4 \n\t" + "1: \n\t" + "pld [%[pixels], %[line_size2]] \n\t" + "pld [%[pixels2], %[line_size2]] \n\t" + "wldrd wr4, [%[pixels]] \n\t" + "wldrd wr5, [%[pixels2]] \n\t" + "pld [%[block], #32] \n\t" + "wunpckelub wr6, wr4 \n\t" + "wldrd wr0, [%[block]] \n\t" + "wunpckehub wr7, wr4 \n\t" + "wldrd wr1, [%[block], #8] \n\t" + "wunpckelub wr8, wr5 \n\t" + "wldrd wr2, [%[block], #16] \n\t" + "wunpckehub wr9, wr5 \n\t" + "wldrd wr3, [%[block], #24] \n\t" + "add %[block], %[block], #32 \n\t" + "waddhss wr10, wr0, wr6 \n\t" + "waddhss wr11, wr1, wr7 \n\t" + "waddhss wr12, wr2, wr8 \n\t" + "waddhss wr13, wr3, wr9 \n\t" + "wpackhus wr14, wr10, wr11 \n\t" + "wpackhus wr15, wr12, wr13 \n\t" + "wstrd wr14, [%[pixels]] \n\t" + "add %[pixels], %[pixels], %[line_size2] \n\t" + "subs r12, r12, #1 \n\t" + "wstrd wr15, [%[pixels2]] \n\t" + "add %[pixels2], %[pixels2], %[line_size2] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2) + : [line_size2]"r"(line_size << 1) + : "cc", "memory", "r12"); +} + +static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + return; +} + +int mm_flags; /* multimedia extension flags */ + +int mm_support(void) +{ + return 0; /* TODO, implement proper detection */ +} + +void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) +{ + mm_flags = mm_support(); + + if (avctx->dsp_mask) { + if (avctx->dsp_mask & FF_MM_FORCE) + mm_flags |= (avctx->dsp_mask & 0xffff); + else + mm_flags &= ~(avctx->dsp_mask & 0xffff); + } + + if (!(mm_flags & MM_IWMMXT)) return; + + c->add_pixels_clamped = add_pixels_clamped_iwmmxt; + + c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; + c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt; + c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt; + c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt; + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt; + + c->put_pixels_tab[1][0] = put_pixels8_iwmmxt; + c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt; + c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt; + c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt; + c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt; + + c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt; + c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt; + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt; + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt; + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt; + + c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt; + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt; + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; +} diff --git a/mpeg4/src/libavcodec/armv4l/dsputil_iwmmxt_rnd.h b/mpeg4/src/libavcodec/armv4l/dsputil_iwmmxt_rnd.h new file mode 100644 index 0000000000000000000000000000000000000000..ca49a76bdc617ab839337873844e81f384885533 --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/dsputil_iwmmxt_rnd.h @@ -0,0 +1,1093 @@ +void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size] \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr4, [r4, #8] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size] \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr4, [r4, #8] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "wldrd wr0, [%[block]] \n\t" + "wldrd wr2, [r5] \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + WAVG2B" wr8, wr8, wr0 \n\t" + WAVG2B" wr10, wr10, wr2 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size] \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr2, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "wldrd wr4, [r4, #8] \n\t" + "walignr1 wr9, wr1, wr2 \n\t" + "wldrd wr5, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + "wstrd wr8, [%[block]] \n\t" + "walignr1 wr11, wr4, wr5 \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "wstrd wr11, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + __asm__ __volatile__ ( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "1: \n\t" + "wldrd wr0, [%[pixels]] \n\t" + "wldrd wr1, [%[pixels], #8] \n\t" + "subs %[h], %[h], #2 \n\t" + "wldrd wr2, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr3, [r4] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr8, wr0, wr1 \n\t" + "wldrd wr4, [r4, #8] \n\t" + "walignr1 wr9, wr1, wr2 \n\t" + "wldrd wr5, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "wldrd wr0, [%[block]] \n\t" + "pld [r4] \n\t" + "wldrd wr1, [%[block], #8] \n\t" + "pld [r4, #32] \n\t" + "wldrd wr2, [r5] \n\t" + "walignr1 wr10, wr3, wr4 \n\t" + "wldrd wr3, [r5, #8] \n\t" + WAVG2B" wr8, wr8, wr0 \n\t" + WAVG2B" wr9, wr9, wr1 \n\t" + WAVG2B" wr10, wr10, wr2 \n\t" + "wstrd wr8, [%[block]] \n\t" + "walignr1 wr11, wr4, wr5 \n\t" + WAVG2B" wr11, wr11, wr3 \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr10, [r5] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "wstrd wr11, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h) + : + : "memory", "r4", "r5", "r12"); +} + +void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr6, wr14 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + "wstrd wr0, [%[block]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr2, [r5] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr15, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "walignr1 wr3, wr14, wr15 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr5, wr12 \n\t" + "wmoveq wr6, wr14 \n\t" + "wmoveq wr7, wr15 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "walignr2ne wr5, wr11, wr12 \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + "walignr2ne wr7, wr14, wr15 \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + WAVG2B" wr1, wr1, wr5 \n\t" + "wstrd wr0, [%[block]] \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + "wstrd wr1, [%[block], #8] \n\t" + WAVG2B" wr3, wr3, wr7 \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr2, [r5] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr3, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr6, wr14 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "wldrd wr10, [%[block]] \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + "wldrd wr12, [r5] \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + WAVG2B" wr0, wr0, wr10 \n\t" + WAVG2B" wr2, wr2, wr12 \n\t" + "wstrd wr0, [%[block]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr2, [r5] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "add r4, %[pixels], %[line_size]\n\t" + "tmcr wcgr2, r12 \n\t" + "add r5, %[block], %[line_size] \n\t" + "mov %[line_size], %[line_size], lsl #1 \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "wldrd wr13, [r4] \n\t" + "pld [%[pixels]] \n\t" + "wldrd wr14, [r4, #8] \n\t" + "pld [%[pixels], #32] \n\t" + "wldrd wr15, [r4, #16] \n\t" + "add r4, r4, %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [r4] \n\t" + "pld [r4, #32] \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + "walignr1 wr2, wr13, wr14 \n\t" + "walignr1 wr3, wr14, wr15 \n\t" + "wmoveq wr4, wr11 \n\t" + "wmoveq wr5, wr12 \n\t" + "wmoveq wr6, wr14 \n\t" + "wmoveq wr7, wr15 \n\t" + "walignr2ne wr4, wr10, wr11 \n\t" + "walignr2ne wr5, wr11, wr12 \n\t" + "walignr2ne wr6, wr13, wr14 \n\t" + "walignr2ne wr7, wr14, wr15 \n\t" + "wldrd wr10, [%[block]] \n\t" + WAVG2B" wr0, wr0, wr4 \n\t" + "wldrd wr11, [%[block], #8] \n\t" + WAVG2B" wr1, wr1, wr5 \n\t" + "wldrd wr12, [r5] \n\t" + WAVG2B" wr2, wr2, wr6 \n\t" + "wldrd wr13, [r5, #8] \n\t" + WAVG2B" wr3, wr3, wr7 \n\t" + WAVG2B" wr0, wr0, wr10 \n\t" + WAVG2B" wr1, wr1, wr11 \n\t" + WAVG2B" wr2, wr2, wr12 \n\t" + WAVG2B" wr3, wr3, wr13 \n\t" + "wstrd wr0, [%[block]] \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr1, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wstrd wr2, [r5] \n\t" + "pld [%[block]] \n\t" + "wstrd wr3, [r5, #8] \n\t" + "add r5, r5, %[line_size] \n\t" + "pld [%[block], #32] \n\t" + "pld [r5] \n\t" + "pld [r5, #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + :"r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr4, wr10, wr11 \n\t" + "wldrd wr10, [%[block]] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "wldrd wr10, [%[block]] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "pld [%[block]] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "cc", "memory", "r12"); +} + +void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr4, wr10, wr11 \n\t" + "walignr1 wr5, wr11, wr12 \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + int stride = line_size; + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "and r12, %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + + "1: \n\t" + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr4, wr10, wr11 \n\t" + "walignr1 wr5, wr11, wr12 \n\t" + "wldrd wr10, [%[block]] \n\t" + "wldrd wr11, [%[block], #8] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + WAVG2B" wr9, wr9, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "wldrd wr10, [%[pixels]] \n\t" + "wldrd wr11, [%[pixels], #8] \n\t" + "pld [%[block]] \n\t" + "wldrd wr12, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr0, wr10, wr11 \n\t" + "walignr1 wr1, wr11, wr12 \n\t" + "wldrd wr10, [%[block]] \n\t" + "wldrd wr11, [%[block], #8] \n\t" + WAVG2B" wr8, wr0, wr4 \n\t" + WAVG2B" wr9, wr1, wr5 \n\t" + WAVG2B" wr8, wr8, wr10 \n\t" + WAVG2B" wr9, wr9, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "pld [%[block]] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride) + : + : "r4", "r5", "r12", "memory"); +} + +void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "add r12, r12, #1 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "tmcr wcgr2, r12 \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "cmp r12, #8 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "subs %[h], %[h], #2 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} + +void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + /* alignment */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "tmcr wcgr2, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr7, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr6, wr7 \n\t" + "wunpckehub wr7, wr7 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr6, wr6, wr10 \n\t" + "waddhus wr7, wr7, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + "subs %[h], %[h], #2 \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} + +void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "add r12, r12, #1 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "tmcr wcgr2, r12 \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "cmp r12, #8 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "wldrd wr12, [%[pixels]] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr13, [%[pixels], #8] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "wmoveq wr10, wr13 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "subs %[h], %[h], #2 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + "wstrd wr8, [%[block]] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} + +void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h) +{ + // [wr0 wr1 wr2 wr3] for previous line + // [wr4 wr5 wr6 wr7] for current line + SET_RND(wr15); // =2 for rnd and =1 for no_rnd version + __asm__ __volatile__( + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "pld [%[pixels]] \n\t" + "mov r12, #2 \n\t" + "pld [%[pixels], #32] \n\t" + "tmcr wcgr0, r12 \n\t" /* for shift value */ + /* alignment */ + "and r12, %[pixels], #7 \n\t" + "bic %[pixels], %[pixels], #7 \n\t" + "tmcr wcgr1, r12 \n\t" + "add r12, r12, #1 \n\t" + "tmcr wcgr2, r12 \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "pld [%[pixels]] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + + "1: \n\t" + // [wr0 wr1 wr2 wr3] + // [wr4 wr5 wr6 wr7] <= * + "wldrd wr12, [%[pixels]] \n\t" + "cmp r12, #8 \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr6, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr7, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr4, wr6 \n\t" + "wunpckehub wr5, wr6 \n\t" + "wunpckelub wr6, wr7 \n\t" + "wunpckehub wr7, wr7 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr4, wr4, wr8 \n\t" + "waddhus wr5, wr5, wr9 \n\t" + "waddhus wr6, wr6, wr10 \n\t" + "waddhus wr7, wr7, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wldrd wr13, [%[block], #8] \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + WAVG2B" wr9, wr9, wr13 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + + // [wr0 wr1 wr2 wr3] <= * + // [wr4 wr5 wr6 wr7] + "wldrd wr12, [%[pixels]] \n\t" + "pld [%[block]] \n\t" + "wldrd wr13, [%[pixels], #8] \n\t" + "pld [%[block], #32] \n\t" + "wldrd wr14, [%[pixels], #16] \n\t" + "add %[pixels], %[pixels], %[line_size] \n\t" + "walignr1 wr2, wr12, wr13 \n\t" + "pld [%[pixels]] \n\t" + "pld [%[pixels], #32] \n\t" + "walignr1 wr3, wr13, wr14 \n\t" + "wmoveq wr10, wr13 \n\t" + "wmoveq wr11, wr14 \n\t" + "walignr2ne wr10, wr12, wr13 \n\t" + "walignr2ne wr11, wr13, wr14 \n\t" + "wunpckelub wr0, wr2 \n\t" + "wunpckehub wr1, wr2 \n\t" + "wunpckelub wr2, wr3 \n\t" + "wunpckehub wr3, wr3 \n\t" + "wunpckelub wr8, wr10 \n\t" + "wunpckehub wr9, wr10 \n\t" + "wunpckelub wr10, wr11 \n\t" + "wunpckehub wr11, wr11 \n\t" + "waddhus wr0, wr0, wr8 \n\t" + "waddhus wr1, wr1, wr9 \n\t" + "waddhus wr2, wr2, wr10 \n\t" + "waddhus wr3, wr3, wr11 \n\t" + "waddhus wr8, wr0, wr4 \n\t" + "waddhus wr9, wr1, wr5 \n\t" + "waddhus wr10, wr2, wr6 \n\t" + "waddhus wr11, wr3, wr7 \n\t" + "waddhus wr8, wr8, wr15 \n\t" + "waddhus wr9, wr9, wr15 \n\t" + "waddhus wr10, wr10, wr15 \n\t" + "waddhus wr11, wr11, wr15 \n\t" + "wsrlhg wr8, wr8, wcgr0 \n\t" + "wsrlhg wr9, wr9, wcgr0 \n\t" + "wldrd wr12, [%[block]] \n\t" + "wldrd wr13, [%[block], #8] \n\t" + "wsrlhg wr10, wr10, wcgr0 \n\t" + "wsrlhg wr11, wr11, wcgr0 \n\t" + "wpackhus wr8, wr8, wr9 \n\t" + "wpackhus wr9, wr10, wr11 \n\t" + WAVG2B" wr8, wr8, wr12 \n\t" + WAVG2B" wr9, wr9, wr13 \n\t" + "wstrd wr8, [%[block]] \n\t" + "wstrd wr9, [%[block], #8] \n\t" + "add %[block], %[block], %[line_size] \n\t" + "subs %[h], %[h], #2 \n\t" + "pld [%[block]] \n\t" + "pld [%[block], #32] \n\t" + "bne 1b \n\t" + : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block) + : [line_size]"r"(line_size) + : "r12", "memory"); +} diff --git a/mpeg4/src/libavcodec/armv4l/jrevdct_arm.S b/mpeg4/src/libavcodec/armv4l/jrevdct_arm.S new file mode 100644 index 0000000000000000000000000000000000000000..294ea475097bcb162194031f8d225068d28128b2 --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/jrevdct_arm.S @@ -0,0 +1,386 @@ +/* + C-like prototype : + void j_rev_dct_ARM(DCTBLOCK data) + + With DCTBLOCK being a pointer to an array of 64 'signed shorts' + + Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org) + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ +#define FIX_0_298631336 2446 +#define FIX_0_541196100 4433 +#define FIX_0_765366865 6270 +#define FIX_1_175875602 9633 +#define FIX_1_501321110 12299 +#define FIX_2_053119869 16819 +#define FIX_3_072711026 25172 +#define FIX_M_0_390180644 -3196 +#define FIX_M_0_899976223 -7373 +#define FIX_M_1_847759065 -15137 +#define FIX_M_1_961570560 -16069 +#define FIX_M_2_562915447 -20995 +#define FIX_0xFFFF 0xFFFF + +#define FIX_0_298631336_ID 0 +#define FIX_0_541196100_ID 4 +#define FIX_0_765366865_ID 8 +#define FIX_1_175875602_ID 12 +#define FIX_1_501321110_ID 16 +#define FIX_2_053119869_ID 20 +#define FIX_3_072711026_ID 24 +#define FIX_M_0_390180644_ID 28 +#define FIX_M_0_899976223_ID 32 +#define FIX_M_1_847759065_ID 36 +#define FIX_M_1_961570560_ID 40 +#define FIX_M_2_562915447_ID 44 +#define FIX_0xFFFF_ID 48 + .text + .align + + .global j_rev_dct_ARM +j_rev_dct_ARM: + stmdb sp!, { r4 - r12, lr } @ all callee saved regs + + sub sp, sp, #4 @ reserve some space on the stack + str r0, [ sp ] @ save the DCT pointer to the stack + + mov lr, r0 @ lr = pointer to the current row + mov r12, #8 @ r12 = row-counter + add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array +row_loop: + ldrsh r0, [lr, # 0] @ r0 = 'd0' + ldrsh r1, [lr, # 8] @ r1 = 'd1' + + @ Optimization for row that have all items except the first set to 0 + @ (this works as the DCTELEMS are always 4-byte aligned) + ldr r5, [lr, # 0] + ldr r2, [lr, # 4] + ldr r3, [lr, # 8] + ldr r4, [lr, #12] + orr r3, r3, r4 + orr r3, r3, r2 + orrs r5, r3, r5 + beq end_of_row_loop @ nothing to be done as ALL of them are '0' + orrs r2, r3, r1 + beq empty_row + + ldrsh r2, [lr, # 2] @ r2 = 'd2' + ldrsh r4, [lr, # 4] @ r4 = 'd4' + ldrsh r6, [lr, # 6] @ r6 = 'd6' + + ldr r3, [r11, #FIX_0_541196100_ID] + add r7, r2, r6 + ldr r5, [r11, #FIX_M_1_847759065_ID] + mul r7, r3, r7 @ r7 = z1 + ldr r3, [r11, #FIX_0_765366865_ID] + mla r6, r5, r6, r7 @ r6 = tmp2 + add r5, r0, r4 @ r5 = tmp0 + mla r2, r3, r2, r7 @ r2 = tmp3 + sub r3, r0, r4 @ r3 = tmp1 + + add r0, r2, r5, lsl #13 @ r0 = tmp10 + rsb r2, r2, r5, lsl #13 @ r2 = tmp13 + add r4, r6, r3, lsl #13 @ r4 = tmp11 + rsb r3, r6, r3, lsl #13 @ r3 = tmp12 + + stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11 + + ldrsh r3, [lr, #10] @ r3 = 'd3' + ldrsh r5, [lr, #12] @ r5 = 'd5' + ldrsh r7, [lr, #14] @ r7 = 'd7' + + add r0, r3, r5 @ r0 = 'z2' + add r2, r1, r7 @ r2 = 'z1' + add r4, r3, r7 @ r4 = 'z3' + add r6, r1, r5 @ r6 = 'z4' + ldr r9, [r11, #FIX_1_175875602_ID] + add r8, r4, r6 @ r8 = z3 + z4 + ldr r10, [r11, #FIX_M_0_899976223_ID] + mul r8, r9, r8 @ r8 = 'z5' + ldr r9, [r11, #FIX_M_2_562915447_ID] + mul r2, r10, r2 @ r2 = 'z1' + ldr r10, [r11, #FIX_M_1_961570560_ID] + mul r0, r9, r0 @ r0 = 'z2' + ldr r9, [r11, #FIX_M_0_390180644_ID] + mla r4, r10, r4, r8 @ r4 = 'z3' + ldr r10, [r11, #FIX_0_298631336_ID] + mla r6, r9, r6, r8 @ r6 = 'z4' + ldr r9, [r11, #FIX_2_053119869_ID] + mla r7, r10, r7, r2 @ r7 = tmp0 + z1 + ldr r10, [r11, #FIX_3_072711026_ID] + mla r5, r9, r5, r0 @ r5 = tmp1 + z2 + ldr r9, [r11, #FIX_1_501321110_ID] + mla r3, r10, r3, r0 @ r3 = tmp2 + z2 + add r7, r7, r4 @ r7 = tmp0 + mla r1, r9, r1, r2 @ r1 = tmp3 + z1 + add r5, r5, r6 @ r5 = tmp1 + add r3, r3, r4 @ r3 = tmp2 + add r1, r1, r6 @ r1 = tmp3 + + ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 + + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS) + add r8, r0, r1 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 0] + + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS) + sub r8, r0, r1 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, #14] + + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS) + add r8, r6, r3 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 2] + + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS) + sub r8, r6, r3 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, #12] + + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS) + add r8, r4, r5 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 4] + + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS) + sub r8, r4, r5 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, #10] + + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS) + add r8, r2, r7 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 6] + + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS) + sub r8, r2, r7 + add r8, r8, #(1<<10) + mov r8, r8, asr #11 + strh r8, [lr, # 8] + + @ End of row loop + add lr, lr, #16 + subs r12, r12, #1 + bne row_loop + beq start_column_loop + +empty_row: + ldr r1, [r11, #FIX_0xFFFF_ID] + mov r0, r0, lsl #2 + and r0, r0, r1 + add r0, r0, r0, lsl #16 + str r0, [lr, # 0] + str r0, [lr, # 4] + str r0, [lr, # 8] + str r0, [lr, #12] + +end_of_row_loop: + @ End of loop + add lr, lr, #16 + subs r12, r12, #1 + bne row_loop + +start_column_loop: + @ Start of column loop + ldr lr, [ sp ] + mov r12, #8 +column_loop: + ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0' + ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2' + ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4' + ldrsh r6, [lr, #(12*8)] @ r6 = 'd6' + + ldr r3, [r11, #FIX_0_541196100_ID] + add r1, r2, r6 + ldr r5, [r11, #FIX_M_1_847759065_ID] + mul r1, r3, r1 @ r1 = z1 + ldr r3, [r11, #FIX_0_765366865_ID] + mla r6, r5, r6, r1 @ r6 = tmp2 + add r5, r0, r4 @ r5 = tmp0 + mla r2, r3, r2, r1 @ r2 = tmp3 + sub r3, r0, r4 @ r3 = tmp1 + + add r0, r2, r5, lsl #13 @ r0 = tmp10 + rsb r2, r2, r5, lsl #13 @ r2 = tmp13 + add r4, r6, r3, lsl #13 @ r4 = tmp11 + rsb r6, r6, r3, lsl #13 @ r6 = tmp12 + + ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1' + ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3' + ldrsh r5, [lr, #(10*8)] @ r5 = 'd5' + ldrsh r7, [lr, #(14*8)] @ r7 = 'd7' + + @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats) + orr r9, r1, r3 + orr r10, r5, r7 + orrs r10, r9, r10 + beq empty_odd_column + + stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11 + + add r0, r3, r5 @ r0 = 'z2' + add r2, r1, r7 @ r2 = 'z1' + add r4, r3, r7 @ r4 = 'z3' + add r6, r1, r5 @ r6 = 'z4' + ldr r9, [r11, #FIX_1_175875602_ID] + add r8, r4, r6 + ldr r10, [r11, #FIX_M_0_899976223_ID] + mul r8, r9, r8 @ r8 = 'z5' + ldr r9, [r11, #FIX_M_2_562915447_ID] + mul r2, r10, r2 @ r2 = 'z1' + ldr r10, [r11, #FIX_M_1_961570560_ID] + mul r0, r9, r0 @ r0 = 'z2' + ldr r9, [r11, #FIX_M_0_390180644_ID] + mla r4, r10, r4, r8 @ r4 = 'z3' + ldr r10, [r11, #FIX_0_298631336_ID] + mla r6, r9, r6, r8 @ r6 = 'z4' + ldr r9, [r11, #FIX_2_053119869_ID] + mla r7, r10, r7, r2 @ r7 = tmp0 + z1 + ldr r10, [r11, #FIX_3_072711026_ID] + mla r5, r9, r5, r0 @ r5 = tmp1 + z2 + ldr r9, [r11, #FIX_1_501321110_ID] + mla r3, r10, r3, r0 @ r3 = tmp2 + z2 + add r7, r7, r4 @ r7 = tmp0 + mla r1, r9, r1, r2 @ r1 = tmp3 + z1 + add r5, r5, r6 @ r5 = tmp1 + add r3, r3, r4 @ r3 = tmp2 + add r1, r1, r6 @ r1 = tmp3 + + ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12 + @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0 + + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) + add r8, r0, r1 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 0*8)] + + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) + sub r8, r0, r1 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #(14*8)] + + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) + add r8, r4, r3 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 2*8)] + + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) + sub r8, r4, r3 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #(12*8)] + + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) + add r8, r6, r5 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 4*8)] + + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) + sub r8, r6, r5 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #(10*8)] + + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) + add r8, r2, r7 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 6*8)] + + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) + sub r8, r2, r7 + add r8, r8, #(1<<17) + mov r8, r8, asr #18 + strh r8, [lr, #( 8*8)] + + @ End of row loop + add lr, lr, #2 + subs r12, r12, #1 + bne column_loop + beq the_end + +empty_odd_column: + @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3) + add r0, r0, #(1<<17) + mov r0, r0, asr #18 + strh r0, [lr, #( 0*8)] + strh r0, [lr, #(14*8)] + + @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3) + add r4, r4, #(1<<17) + mov r4, r4, asr #18 + strh r4, [lr, #( 2*8)] + strh r4, [lr, #(12*8)] + + @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3) + add r6, r6, #(1<<17) + mov r6, r6, asr #18 + strh r6, [lr, #( 4*8)] + strh r6, [lr, #(10*8)] + + @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3) + @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3) + add r2, r2, #(1<<17) + mov r2, r2, asr #18 + strh r2, [lr, #( 6*8)] + strh r2, [lr, #( 8*8)] + + @ End of row loop + add lr, lr, #2 + subs r12, r12, #1 + bne column_loop + +the_end: + @ The end.... + add sp, sp, #4 + ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return + +const_array: + .align + .word FIX_0_298631336 + .word FIX_0_541196100 + .word FIX_0_765366865 + .word FIX_1_175875602 + .word FIX_1_501321110 + .word FIX_2_053119869 + .word FIX_3_072711026 + .word FIX_M_0_390180644 + .word FIX_M_0_899976223 + .word FIX_M_1_847759065 + .word FIX_M_1_961570560 + .word FIX_M_2_562915447 + .word FIX_0xFFFF diff --git a/mpeg4/src/libavcodec/armv4l/mpegvideo_arm.c b/mpeg4/src/libavcodec/armv4l/mpegvideo_arm.c new file mode 100644 index 0000000000000000000000000000000000000000..263e3c5bc41b0dad9e30323cbfd2d5e0f7d12247 --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/mpegvideo_arm.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "../dsputil.h" +#include "../mpegvideo.h" +#include "../avcodec.h" + +extern void MPV_common_init_iwmmxt(MpegEncContext *s); + +void MPV_common_init_armv4l(MpegEncContext *s) +{ +#ifdef HAVE_IWMMXT + MPV_common_init_iwmmxt(s); +#endif +} diff --git a/mpeg4/src/libavcodec/armv4l/mpegvideo_iwmmxt.c b/mpeg4/src/libavcodec/armv4l/mpegvideo_iwmmxt.c new file mode 100644 index 0000000000000000000000000000000000000000..9724de8c4c153fa79096cc91b515af5f2f247e6a --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/mpegvideo_iwmmxt.c @@ -0,0 +1,99 @@ +#include "../dsputil.h" +#include "../mpegvideo.h" +#include "../avcodec.h" + +static void dct_unquantize_h263_intra_iwmmxt(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int level, qmul, qadd; + int nCoeffs; + DCTELEM *block_orig = block; + + assert(s->block_last_index[n]>=0); + + qmul = qscale << 1; + + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + level = block[0]; + } + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + __asm__ __volatile__ ( +/* "movd %1, %%mm6 \n\t" //qmul */ +/* "packssdw %%mm6, %%mm6 \n\t" */ +/* "packssdw %%mm6, %%mm6 \n\t" */ + "tbcsth wr6, %[qmul] \n\t" +/* "movd %2, %%mm5 \n\t" //qadd */ +/* "packssdw %%mm5, %%mm5 \n\t" */ +/* "packssdw %%mm5, %%mm5 \n\t" */ + "tbcsth wr5, %[qadd] \n\t" + "wzero wr7 \n\t" /* "pxor %%mm7, %%mm7 \n\t" */ + "wzero wr4 \n\t" /* "pxor %%mm4, %%mm4 \n\t" */ + "wsubh wr7, wr5, wr7 \n\t" /* "psubw %%mm5, %%mm7 \n\t" */ + "1: \n\t" + "wldrd wr2, [%[block]] \n\t" /* "movq (%0, %3), %%mm0 \n\t" */ + "wldrd wr3, [%[block], #8] \n\t" /* "movq 8(%0, %3), %%mm1 \n\t" */ + "wmulsl wr0, wr6, wr2 \n\t" /* "pmullw %%mm6, %%mm0 \n\t" */ + "wmulsl wr1, wr6, wr3 \n\t" /* "pmullw %%mm6, %%mm1 \n\t" */ +/* "movq (%0, %3), %%mm2 \n\t" */ +/* "movq 8(%0, %3), %%mm3 \n\t" */ + "wcmpgtsh wr2, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 */ + "wcmpgtsh wr3, wr4, wr2 \n\t" /* "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 */ + "wxor wr0, wr2, wr0 \n\t" /* "pxor %%mm2, %%mm0 \n\t" */ + "wxor wr1, wr3, wr1 \n\t" /* "pxor %%mm3, %%mm1 \n\t" */ + "waddh wr0, wr7, wr0 \n\t" /* "paddw %%mm7, %%mm0 \n\t" */ + "waddh wr1, wr7, wr1 \n\t" /* "paddw %%mm7, %%mm1 \n\t" */ + "wxor wr2, wr0, wr2 \n\t" /* "pxor %%mm0, %%mm2 \n\t" */ + "wxor wr3, wr1, wr3 \n\t" /* "pxor %%mm1, %%mm3 \n\t" */ + "wcmpeqh wr0, wr7, wr0 \n\t" /* "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 */ + "wcmpeqh wr1, wr7, wr1 \n\t" /* "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 */ + "wandn wr0, wr2, wr0 \n\t" /* "pandn %%mm2, %%mm0 \n\t" */ + "wandn wr1, wr3, wr1 \n\t" /* "pandn %%mm3, %%mm1 \n\t" */ + "wstrd wr0, [%[block]] \n\t" /* "movq %%mm0, (%0, %3) \n\t" */ + "wstrd wr1, [%[block], #8] \n\t" /* "movq %%mm1, 8(%0, %3) \n\t" */ + "add %[block], %[block], #16 \n\t" /* "addl $16, %3 \n\t" */ + "subs %[i], %[i], #1 \n\t" + "bne 1b \n\t" /* "jng 1b \n\t" */ + :[block]"+r"(block) + :[i]"r"((nCoeffs + 8) / 8), [qmul]"r"(qmul), [qadd]"r"(qadd) + :"memory"); + + block_orig[0] = level; +} + +#if 0 +static void dct_unquantize_h263_inter_iwmmxt(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int nCoeffs; + + assert(s->block_last_index[n]>=0); + + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + ippiQuantInvInter_Compact_H263_16s_I(block, nCoeffs+1, qscale); +} +#endif + +void MPV_common_init_iwmmxt(MpegEncContext *s) +{ + if (!(mm_flags & MM_IWMMXT)) return; + + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_iwmmxt; +#if 0 + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_iwmmxt; +#endif +} diff --git a/mpeg4/src/libavcodec/armv4l/simple_idct_arm.S b/mpeg4/src/libavcodec/armv4l/simple_idct_arm.S new file mode 100644 index 0000000000000000000000000000000000000000..43751896d3292a435511fca0811802b43385c985 --- /dev/null +++ b/mpeg4/src/libavcodec/armv4l/simple_idct_arm.S @@ -0,0 +1,485 @@ +/* + * simple_idct_arm.S + * Copyright (C) 2002 Frederic 'dilb' Boulay. + * All Rights Reserved. + * + * Author: Frederic Boulay + * + * You can redistribute this file and/or modify + * it under the terms of the GNU General Public License (version 2) + * as published by the Free Software Foundation. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * + * The function defined in this file, is derived from the simple_idct function + * from the libavcodec library part of the ffmpeg project. + */ + +/* useful constants for the algorithm, they are save in __constant_ptr__ at */ +/* the end of the source code.*/ +#define W1 22725 +#define W2 21407 +#define W3 19266 +#define W4 16383 +#define W5 12873 +#define W6 8867 +#define W7 4520 +#define MASK_MSHW 0xFFFF0000 + +/* offsets of the constants in the vector */ +#define offW1 0 +#define offW2 4 +#define offW3 8 +#define offW4 12 +#define offW5 16 +#define offW6 20 +#define offW7 24 +#define offMASK_MSHW 28 + +#define ROW_SHIFT 11 +#define ROW_SHIFT2MSHW (16-11) +#define COL_SHIFT 20 +#define ROW_SHIFTED_1 1024 /* 1<< (ROW_SHIFT-1) */ +#define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */ + + + .text + .align + .global simple_idct_ARM + +simple_idct_ARM: + @@ void simple_idct_ARM(int16_t *block) + @@ save stack for reg needed (take all of them), + @@ R0-R3 are scratch regs, so no need to save them, but R0 contains the pointer to block + @@ so it must not be overwritten, if it is not saved!! + @@ R12 is another scratch register, so it should not be saved too + @@ save all registers + stmfd sp!, {r4-r11, r14} @ R14 is also called LR + @@ at this point, R0=block, other registers are free. + add r14, r0, #112 @ R14=&block[8*7], better start from the last row, and decrease the value until row=0, i.e. R12=block. + add r12, pc, #(__constant_ptr__-.-8) @ R12=__constant_ptr__, the vector containing the constants, probably not necessary to reserve a register for it + @@ add 2 temporary variables in the stack: R0 and R14 + sub sp, sp, #8 @ allow 2 local variables + str r0, [sp, #0] @ save block in sp[0] + @@ stack status + @@ sp+4 free + @@ sp+0 R0 (block) + + + @@ at this point, R0=block, R14=&block[56], R12=__const_ptr_, R1-R11 free + + +__row_loop: + @@ read the row and check if it is null, almost null, or not, according to strongarm specs, it is not necessary to optimise ldr accesses (i.e. split 32bits in 2 16bits words), at least it gives more usable registers :) + ldr r1, [r14, #0] @ R1=(int32)(R12)[0]=ROWr32[0] (relative row cast to a 32b pointer) + ldr r2, [r14, #4] @ R2=(int32)(R12)[1]=ROWr32[1] + ldr r3, [r14, #8] @ R3=ROWr32[2] + ldr r4, [r14, #12] @ R4=ROWr32[3] + @@ check if the words are null, if all of them are null, then proceed with next row (branch __end_row_loop), + @@ if ROWr16[0] is the only one not null, then proceed with this special case (branch __almost_empty_row) + @@ else follow the complete algorithm. + @@ at this point, R0=block, R14=&block[n], R12=__const_ptr_, R1=ROWr32[0], R2=ROWr32[1], + @@ R3=ROWr32[2], R4=ROWr32[3], R5-R11 free + orr r5, r4, r3 @ R5=R4 | R3 + orr r5, r5, r2 @ R5=R4 | R3 | R2 + orrs r6, r5, r1 @ Test R5 | R1 (the aim is to check if everything is null) + beq __end_row_loop + mov r7, r1, asr #16 @ R7=R1>>16=ROWr16[1] (evaluate it now, as it could be useful later) + ldrsh r6, [r14, #0] @ R6=ROWr16[0] + orrs r5, r5, r7 @ R5=R4 | R3 | R2 | R7 + beq __almost_empty_row + +__b_evaluation: + @@ at this point, R0=block (temp), R1(free), R2=ROWr32[1], R3=ROWr32[2], R4=ROWr32[3], + @@ R5=(temp), R6=ROWr16[0], R7=ROWr16[1], R8-R11 free, + @@ R12=__const_ptr_, R14=&block[n] + @@ to save some registers/calls, proceed with b0-b3 first, followed by a0-a3 + + @@ MUL16(b0, W1, row[1]); + @@ MUL16(b1, W3, row[1]); + @@ MUL16(b2, W5, row[1]); + @@ MUL16(b3, W7, row[1]); + @@ MAC16(b0, W3, row[3]); + @@ MAC16(b1, -W7, row[3]); + @@ MAC16(b2, -W1, row[3]); + @@ MAC16(b3, -W5, row[3]); + ldr r8, [r12, #offW1] @ R8=W1 + mov r2, r2, asr #16 @ R2=ROWr16[3] + mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + ldr r9, [r12, #offW3] @ R9=W3 + ldr r10, [r12, #offW5] @ R10=W5 + mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + ldr r11, [r12, #offW7] @ R11=W7 + mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + teq r2, #0 @ if null avoid muls + mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + rsbne r2, r2, #0 @ R2=-ROWr16[3] + mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + + @@ at this point, R0=b0, R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3], + @@ R5=b2, R6=ROWr16[0], R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, + @@ R12=__const_ptr_, R14=&block[n] + @@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; + @@ if (temp != 0) {} + orrs r2, r3, r4 @ R2=ROWr32[2] | ROWr32[3] + beq __end_b_evaluation + + @@ at this point, R0=b0, R1=b1, R2 (free), R3=ROWr32[2], R4=ROWr32[3], + @@ R5=b2, R6=ROWr16[0], R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, + @@ R12=__const_ptr_, R14=&block[n] + @@ MAC16(b0, W5, row[5]); + @@ MAC16(b2, W7, row[5]); + @@ MAC16(b3, W3, row[5]); + @@ MAC16(b1, -W1, row[5]); + @@ MAC16(b0, W7, row[7]); + @@ MAC16(b2, W3, row[7]); + @@ MAC16(b3, -W1, row[7]); + @@ MAC16(b1, -W5, row[7]); + mov r3, r3, asr #16 @ R3=ROWr16[5] + teq r3, #0 @ if null avoid muls + mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5]=b0 + mov r4, r4, asr #16 @ R4=ROWr16[7] + mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5]=b2 + mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5]=b3 + rsbne r3, r3, #0 @ R3=-ROWr16[5] + mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5]=b1 + @@ R3 is free now + teq r4, #0 @ if null avoid muls + mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7]=b0 + mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7]=b2 + rsbne r4, r4, #0 @ R4=-ROWr16[7] + mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7]=b3 + mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7]=b1 + @@ R4 is free now +__end_b_evaluation: + @@ at this point, R0=b0, R1=b1, R2=ROWr32[2] | ROWr32[3] (tmp), R3 (free), R4 (free), + @@ R5=b2, R6=ROWr16[0], R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), + @@ R12=__const_ptr_, R14=&block[n] + +__a_evaluation: + @@ a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); + @@ a1 = a0 + W6 * row[2]; + @@ a2 = a0 - W6 * row[2]; + @@ a3 = a0 - W2 * row[2]; + @@ a0 = a0 + W2 * row[2]; + ldr r9, [r12, #offW4] @ R9=W4 + mul r6, r9, r6 @ R6=W4*ROWr16[0] + ldr r10, [r12, #offW6] @ R10=W6 + ldrsh r4, [r14, #4] @ R4=ROWr16[2] (a3 not defined yet) + add r6, r6, #ROW_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(ROW_SHIFT-1) (a0) + + mul r11, r10, r4 @ R11=W6*ROWr16[2] + ldr r8, [r12, #offW2] @ R8=W2 + sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2) + @@ temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; + @@ if (temp != 0) {} + teq r2, #0 + beq __end_bef_a_evaluation + + add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) + mul r11, r8, r4 @ R11=W2*ROWr16[2] + sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) + add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) + + + @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, + @@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free), + @@ R12=__const_ptr_, R14=&block[n] + + + @@ a0 += W4*row[4] + @@ a1 -= W4*row[4] + @@ a2 -= W4*row[4] + @@ a3 += W4*row[4] + ldrsh r11, [r14, #8] @ R11=ROWr16[4] + teq r11, #0 @ if null avoid muls + mulne r11, r9, r11 @ R11=W4*ROWr16[4] + @@ R9 is free now + ldrsh r9, [r14, #12] @ R9=ROWr16[6] + addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0) + subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1) + subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) + addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) + @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead + teq r9, #0 @ if null avoid muls + mulne r11, r10, r9 @ R11=W6*ROWr16[6] + addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) + mulne r10, r8, r9 @ R10=W2*ROWr16[6] + @@ a0 += W6*row[6]; + @@ a3 -= W6*row[6]; + @@ a1 -= W2*row[6]; + @@ a2 += W2*row[6]; + subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3) + subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) + addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) + +__end_a_evaluation: + @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, + @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), + @@ R12=__const_ptr_, R14=&block[n] + @@ row[0] = (a0 + b0) >> ROW_SHIFT; + @@ row[1] = (a1 + b1) >> ROW_SHIFT; + @@ row[2] = (a2 + b2) >> ROW_SHIFT; + @@ row[3] = (a3 + b3) >> ROW_SHIFT; + @@ row[4] = (a3 - b3) >> ROW_SHIFT; + @@ row[5] = (a2 - b2) >> ROW_SHIFT; + @@ row[6] = (a1 - b1) >> ROW_SHIFT; + @@ row[7] = (a0 - b0) >> ROW_SHIFT; + add r8, r6, r0 @ R8=a0+b0 + add r9, r2, r1 @ R9=a1+b1 + @@ put 2 16 bits half-words in a 32bits word + @@ ROWr32[0]=ROWr16[0] | (ROWr16[1]<<16) (only Little Endian compliant then!!!) + ldr r10, [r12, #offMASK_MSHW] @ R10=0xFFFF0000 + and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a1+b1)<<5) + mvn r11, r10 @ R11= NOT R10= 0x0000FFFF + and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a0+b0)>>11) + orr r8, r8, r9 + str r8, [r14, #0] + + add r8, r3, r5 @ R8=a2+b2 + add r9, r4, r7 @ R9=a3+b3 + and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a3+b3)<<5) + and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a2+b2)>>11) + orr r8, r8, r9 + str r8, [r14, #4] + + sub r8, r4, r7 @ R8=a3-b3 + sub r9, r3, r5 @ R9=a2-b2 + and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a2-b2)<<5) + and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a3-b3)>>11) + orr r8, r8, r9 + str r8, [r14, #8] + + sub r8, r2, r1 @ R8=a1-b1 + sub r9, r6, r0 @ R9=a0-b0 + and r9, r10, r9, lsl #ROW_SHIFT2MSHW @ R9=0xFFFF0000 & ((a0-b0)<<5) + and r8, r11, r8, asr #ROW_SHIFT @ R8=0x0000FFFF & ((a1-b1)>>11) + orr r8, r8, r9 + str r8, [r14, #12] + + bal __end_row_loop + +__almost_empty_row: + @@ the row was empty, except ROWr16[0], now, management of this special case + @@ at this point, R0=block, R14=&block[n], R12=__const_ptr_, R1=ROWr32[0], R2=ROWr32[1], + @@ R3=ROWr32[2], R4=ROWr32[3], R5=(temp), R6=ROWr16[0], R7=ROWr16[1], + @@ R8=0xFFFF (temp), R9-R11 free + mov r8, #0x10000 @ R8=0xFFFF (2 steps needed!) it saves a ldr call (because of delay run). + sub r8, r8, #1 @ R8 is now ready. + and r5, r8, r6, lsl #3 @ R5=R8 & (R6<<3)= (ROWr16[0]<<3) & 0xFFFF + orr r5, r5, r5, lsl #16 @ R5=R5 | (R5<<16) + str r5, [r14, #0] @ R14[0]=ROWr32[0]=R5 + str r5, [r14, #4] @ R14[4]=ROWr32[1]=R5 + str r5, [r14, #8] @ R14[8]=ROWr32[2]=R5 + str r5, [r14, #12] @ R14[12]=ROWr32[3]=R5 + +__end_row_loop: + @@ at this point, R0-R11 (free) + @@ R12=__const_ptr_, R14=&block[n] + ldr r0, [sp, #0] @ R0=block + teq r0, r14 @ compare current &block[8*n] to block, when block is reached, the loop is finished. + sub r14, r14, #16 + bne __row_loop + + + + @@ at this point, R0=block, R1-R11 (free) + @@ R12=__const_ptr_, R14=&block[n] + add r14, r0, #14 @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block. +__col_loop: + +__b_evaluation2: + @@ at this point, R0=block (temp), R1-R11 (free) + @@ R12=__const_ptr_, R14=&block[n] + @@ proceed with b0-b3 first, followed by a0-a3 + @@ MUL16(b0, W1, col[8x1]); + @@ MUL16(b1, W3, col[8x1]); + @@ MUL16(b2, W5, col[8x1]); + @@ MUL16(b3, W7, col[8x1]); + @@ MAC16(b0, W3, col[8x3]); + @@ MAC16(b1, -W7, col[8x3]); + @@ MAC16(b2, -W1, col[8x3]); + @@ MAC16(b3, -W5, col[8x3]); + ldr r8, [r12, #offW1] @ R8=W1 + ldrsh r7, [r14, #16] + mul r0, r8, r7 @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + ldr r9, [r12, #offW3] @ R9=W3 + ldr r10, [r12, #offW5] @ R10=W5 + mul r1, r9, r7 @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + ldr r11, [r12, #offW7] @ R11=W7 + mul r5, r10, r7 @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + ldrsh r2, [r14, #48] + mul r7, r11, r7 @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle) + teq r2, #0 @ if 0, then avoid muls + mlane r0, r9, r2, r0 @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + rsbne r2, r2, #0 @ R2=-ROWr16[3] + mlane r1, r11, r2, r1 @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + mlane r5, r8, r2, r5 @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + mlane r7, r10, r2, r7 @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle) + + @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), + @@ R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7, + @@ R12=__const_ptr_, R14=&block[n] + @@ MAC16(b0, W5, col[5x8]); + @@ MAC16(b2, W7, col[5x8]); + @@ MAC16(b3, W3, col[5x8]); + @@ MAC16(b1, -W1, col[5x8]); + @@ MAC16(b0, W7, col[7x8]); + @@ MAC16(b2, W3, col[7x8]); + @@ MAC16(b3, -W1, col[7x8]); + @@ MAC16(b1, -W5, col[7x8]); + ldrsh r3, [r14, #80] @ R3=COLr16[5x8] + teq r3, #0 @ if 0 then avoid muls + mlane r0, r10, r3, r0 @ R0+=W5*ROWr16[5x8]=b0 + mlane r5, r11, r3, r5 @ R5+=W7*ROWr16[5x8]=b2 + mlane r7, r9, r3, r7 @ R7+=W3*ROWr16[5x8]=b3 + rsbne r3, r3, #0 @ R3=-ROWr16[5x8] + ldrsh r4, [r14, #112] @ R4=COLr16[7x8] + mlane r1, r8, r3, r1 @ R7-=W1*ROWr16[5x8]=b1 + @@ R3 is free now + teq r4, #0 @ if 0 then avoid muls + mlane r0, r11, r4, r0 @ R0+=W7*ROWr16[7x8]=b0 + mlane r5, r9, r4, r5 @ R5+=W3*ROWr16[7x8]=b2 + rsbne r4, r4, #0 @ R4=-ROWr16[7x8] + mlane r7, r8, r4, r7 @ R7-=W1*ROWr16[7x8]=b3 + mlane r1, r10, r4, r1 @ R1-=W5*ROWr16[7x8]=b1 + @@ R4 is free now +__end_b_evaluation2: + @@ at this point, R0=b0, R1=b1, R2 (free), R3 (free), R4 (free), + @@ R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), + @@ R12=__const_ptr_, R14=&block[n] + +__a_evaluation2: + @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1)); + @@ a1 = a0 + W6 * row[2]; + @@ a2 = a0 - W6 * row[2]; + @@ a3 = a0 - W2 * row[2]; + @@ a0 = a0 + W2 * row[2]; + ldrsh r6, [r14, #0] + ldr r9, [r12, #offW4] @ R9=W4 + mul r6, r9, r6 @ R6=W4*ROWr16[0] + ldr r10, [r12, #offW6] @ R10=W6 + ldrsh r4, [r14, #32] @ R4=ROWr16[2] (a3 not defined yet) + add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0) + mul r11, r10, r4 @ R11=W6*ROWr16[2] + ldr r8, [r12, #offW2] @ R8=W2 + add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) + sub r3, r6, r11 @ R3=a0-W6*ROWr16[2] (a2) + mul r11, r8, r4 @ R11=W2*ROWr16[2] + sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) + add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) + + @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, + @@ R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free), + @@ R12=__const_ptr_, R14=&block[n] + @@ a0 += W4*row[4] + @@ a1 -= W4*row[4] + @@ a2 -= W4*row[4] + @@ a3 += W4*row[4] + ldrsh r11, [r14, #64] @ R11=ROWr16[4] + teq r11, #0 @ if null avoid muls + mulne r11, r9, r11 @ R11=W4*ROWr16[4] + @@ R9 is free now + addne r6, r6, r11 @ R6+=W4*ROWr16[4] (a0) + subne r2, r2, r11 @ R2-=W4*ROWr16[4] (a1) + subne r3, r3, r11 @ R3-=W4*ROWr16[4] (a2) + ldrsh r9, [r14, #96] @ R9=ROWr16[6] + addne r4, r4, r11 @ R4+=W4*ROWr16[4] (a3) + @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead + teq r9, #0 @ if null avoid muls + mulne r11, r10, r9 @ R11=W6*ROWr16[6] + addne r6, r6, r11 @ R6+=W6*ROWr16[6] (a0) + mulne r10, r8, r9 @ R10=W2*ROWr16[6] + @@ a0 += W6*row[6]; + @@ a3 -= W6*row[6]; + @@ a1 -= W2*row[6]; + @@ a2 += W2*row[6]; + subne r4, r4, r11 @ R4-=W6*ROWr16[6] (a3) + subne r2, r2, r10 @ R2-=W2*ROWr16[6] (a1) + addne r3, r3, r10 @ R3+=W2*ROWr16[6] (a2) +__end_a_evaluation2: + @@ at this point, R0=b0, R1=b1, R2=a1, R3=a2, R4=a3, + @@ R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free), + @@ R12=__const_ptr_, R14=&block[n] + @@ col[0 ] = ((a0 + b0) >> COL_SHIFT); + @@ col[8 ] = ((a1 + b1) >> COL_SHIFT); + @@ col[16] = ((a2 + b2) >> COL_SHIFT); + @@ col[24] = ((a3 + b3) >> COL_SHIFT); + @@ col[32] = ((a3 - b3) >> COL_SHIFT); + @@ col[40] = ((a2 - b2) >> COL_SHIFT); + @@ col[48] = ((a1 - b1) >> COL_SHIFT); + @@ col[56] = ((a0 - b0) >> COL_SHIFT); + @@@@@ no optimisation here @@@@@ + add r8, r6, r0 @ R8=a0+b0 + add r9, r2, r1 @ R9=a1+b1 + mov r8, r8, asr #COL_SHIFT + mov r9, r9, asr #COL_SHIFT + strh r8, [r14, #0] + strh r9, [r14, #16] + add r8, r3, r5 @ R8=a2+b2 + add r9, r4, r7 @ R9=a3+b3 + mov r8, r8, asr #COL_SHIFT + mov r9, r9, asr #COL_SHIFT + strh r8, [r14, #32] + strh r9, [r14, #48] + sub r8, r4, r7 @ R8=a3-b3 + sub r9, r3, r5 @ R9=a2-b2 + mov r8, r8, asr #COL_SHIFT + mov r9, r9, asr #COL_SHIFT + strh r8, [r14, #64] + strh r9, [r14, #80] + sub r8, r2, r1 @ R8=a1-b1 + sub r9, r6, r0 @ R9=a0-b0 + mov r8, r8, asr #COL_SHIFT + mov r9, r9, asr #COL_SHIFT + strh r8, [r14, #96] + strh r9, [r14, #112] + +__end_col_loop: + @@ at this point, R0-R11 (free) + @@ R12=__const_ptr_, R14=&block[n] + ldr r0, [sp, #0] @ R0=block + teq r0, r14 @ compare current &block[n] to block, when block is reached, the loop is finished. + sub r14, r14, #2 + bne __col_loop + + + + +__end_simple_idct_ARM: + @@ restore registers to previous status! + add sp, sp, #8 @@ the local variables! + ldmfd sp!, {r4-r11, r15} @@ update PC with LR content. + + + +@@ kind of sub-function, here not to overload the common case. +__end_bef_a_evaluation: + add r2, r6, r11 @ R2=a0+W6*ROWr16[2] (a1) + mul r11, r8, r4 @ R11=W2*ROWr16[2] + sub r4, r6, r11 @ R4=a0-W2*ROWr16[2] (a3) + add r6, r6, r11 @ R6=a0+W2*ROWr16[2] (a0) + bal __end_a_evaluation + + +__constant_ptr__: @@ see #defines at the beginning of the source code for values. + .align + .word W1 + .word W2 + .word W3 + .word W4 + .word W5 + .word W6 + .word W7 + .word MASK_MSHW diff --git a/mpeg4/src/libavcodec/asv1.c b/mpeg4/src/libavcodec/asv1.c new file mode 100644 index 0000000000000000000000000000000000000000..3cfb76e65e188f4dbe2d472f8370807879a3a081 --- /dev/null +++ b/mpeg4/src/libavcodec/asv1.c @@ -0,0 +1,647 @@ +/* + * ASUS V1/V2 codec + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file asv1.c + * ASUS V1/V2 codec. + */ + +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +//#undef NDEBUG +//#include + +#define VLC_BITS 6 +#define ASV2_LEVEL_VLC_BITS 10 + +typedef struct ASV1Context{ + AVCodecContext *avctx; + DSPContext dsp; + AVFrame picture; + PutBitContext pb; + GetBitContext gb; + ScanTable scantable; + int inv_qscale; + int mb_width; + int mb_height; + int mb_width2; + int mb_height2; + DECLARE_ALIGNED_8(DCTELEM, block[6][64]); + DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]); + DECLARE_ALIGNED_8(int, q_intra_matrix[64]); + uint8_t *bitstream_buffer; + unsigned int bitstream_buffer_size; +} ASV1Context; + +static const uint8_t scantab[64]={ + 0x00,0x08,0x01,0x09,0x10,0x18,0x11,0x19, + 0x02,0x0A,0x03,0x0B,0x12,0x1A,0x13,0x1B, + 0x04,0x0C,0x05,0x0D,0x20,0x28,0x21,0x29, + 0x06,0x0E,0x07,0x0F,0x14,0x1C,0x15,0x1D, + 0x22,0x2A,0x23,0x2B,0x30,0x38,0x31,0x39, + 0x16,0x1E,0x17,0x1F,0x24,0x2C,0x25,0x2D, + 0x32,0x3A,0x33,0x3B,0x26,0x2E,0x27,0x2F, + 0x34,0x3C,0x35,0x3D,0x36,0x3E,0x37,0x3F, +}; + + +static const uint8_t ccp_tab[17][2]={ + {0x2,2}, {0x7,5}, {0xB,5}, {0x3,5}, + {0xD,5}, {0x5,5}, {0x9,5}, {0x1,5}, + {0xE,5}, {0x6,5}, {0xA,5}, {0x2,5}, + {0xC,5}, {0x4,5}, {0x8,5}, {0x3,2}, + {0xF,5}, //EOB +}; + +static const uint8_t level_tab[7][2]={ + {3,4}, {3,3}, {3,2}, {0,3}, {2,2}, {2,3}, {2,4} +}; + +static const uint8_t dc_ccp_tab[8][2]={ + {0x1,2}, {0xD,4}, {0xF,4}, {0xC,4}, + {0x5,3}, {0xE,4}, {0x4,3}, {0x0,2}, +}; + +static const uint8_t ac_ccp_tab[16][2]={ + {0x00,2}, {0x3B,6}, {0x0A,4}, {0x3A,6}, + {0x02,3}, {0x39,6}, {0x3C,6}, {0x38,6}, + {0x03,3}, {0x3D,6}, {0x08,4}, {0x1F,5}, + {0x09,4}, {0x0B,4}, {0x0D,4}, {0x0C,4}, +}; + +static const uint8_t asv2_level_tab[63][2]={ + {0x3F,10},{0x2F,10},{0x37,10},{0x27,10},{0x3B,10},{0x2B,10},{0x33,10},{0x23,10}, + {0x3D,10},{0x2D,10},{0x35,10},{0x25,10},{0x39,10},{0x29,10},{0x31,10},{0x21,10}, + {0x1F, 8},{0x17, 8},{0x1B, 8},{0x13, 8},{0x1D, 8},{0x15, 8},{0x19, 8},{0x11, 8}, + {0x0F, 6},{0x0B, 6},{0x0D, 6},{0x09, 6}, + {0x07, 4},{0x05, 4}, + {0x03, 2}, + {0x00, 5}, + {0x02, 2}, + {0x04, 4},{0x06, 4}, + {0x08, 6},{0x0C, 6},{0x0A, 6},{0x0E, 6}, + {0x10, 8},{0x18, 8},{0x14, 8},{0x1C, 8},{0x12, 8},{0x1A, 8},{0x16, 8},{0x1E, 8}, + {0x20,10},{0x30,10},{0x28,10},{0x38,10},{0x24,10},{0x34,10},{0x2C,10},{0x3C,10}, + {0x22,10},{0x32,10},{0x2A,10},{0x3A,10},{0x26,10},{0x36,10},{0x2E,10},{0x3E,10}, +}; + + +static VLC ccp_vlc; +static VLC level_vlc; +static VLC dc_ccp_vlc; +static VLC ac_ccp_vlc; +static VLC asv2_level_vlc; + +static void init_vlcs(ASV1Context *a){ + static int done = 0; + + if (!done) { + done = 1; + + init_vlc(&ccp_vlc, VLC_BITS, 17, + &ccp_tab[0][1], 2, 1, + &ccp_tab[0][0], 2, 1, 1); + init_vlc(&dc_ccp_vlc, VLC_BITS, 8, + &dc_ccp_tab[0][1], 2, 1, + &dc_ccp_tab[0][0], 2, 1, 1); + init_vlc(&ac_ccp_vlc, VLC_BITS, 16, + &ac_ccp_tab[0][1], 2, 1, + &ac_ccp_tab[0][0], 2, 1, 1); + init_vlc(&level_vlc, VLC_BITS, 7, + &level_tab[0][1], 2, 1, + &level_tab[0][0], 2, 1, 1); + init_vlc(&asv2_level_vlc, ASV2_LEVEL_VLC_BITS, 63, + &asv2_level_tab[0][1], 2, 1, + &asv2_level_tab[0][0], 2, 1, 1); + } +} + +//FIXME write a reversed bitstream reader to avoid the double reverse +static inline int asv2_get_bits(GetBitContext *gb, int n){ + return ff_reverse[ get_bits(gb, n) << (8-n) ]; +} + +static inline void asv2_put_bits(PutBitContext *pb, int n, int v){ + put_bits(pb, n, ff_reverse[ v << (8-n) ]); +} + +static inline int asv1_get_level(GetBitContext *gb){ + int code= get_vlc2(gb, level_vlc.table, VLC_BITS, 1); + + if(code==3) return get_sbits(gb, 8); + else return code - 3; +} + +static inline int asv2_get_level(GetBitContext *gb){ + int code= get_vlc2(gb, asv2_level_vlc.table, ASV2_LEVEL_VLC_BITS, 1); + + if(code==31) return (int8_t)asv2_get_bits(gb, 8); + else return code - 31; +} + +static inline void asv1_put_level(PutBitContext *pb, int level){ + unsigned int index= level + 3; + + if(index <= 6) put_bits(pb, level_tab[index][1], level_tab[index][0]); + else{ + put_bits(pb, level_tab[3][1], level_tab[3][0]); + put_bits(pb, 8, level&0xFF); + } +} + +static inline void asv2_put_level(PutBitContext *pb, int level){ + unsigned int index= level + 31; + + if(index <= 62) put_bits(pb, asv2_level_tab[index][1], asv2_level_tab[index][0]); + else{ + put_bits(pb, asv2_level_tab[31][1], asv2_level_tab[31][0]); + asv2_put_bits(pb, 8, level&0xFF); + } +} + +static inline int asv1_decode_block(ASV1Context *a, DCTELEM block[64]){ + int i; + + block[0]= 8*get_bits(&a->gb, 8); + + for(i=0; i<11; i++){ + const int ccp= get_vlc2(&a->gb, ccp_vlc.table, VLC_BITS, 1); + + if(ccp){ + if(ccp == 16) break; + if(ccp < 0 || i>=10){ + av_log(a->avctx, AV_LOG_ERROR, "coded coeff pattern damaged\n"); + return -1; + } + + if(ccp&8) block[a->scantable.permutated[4*i+0]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+0])>>4; + if(ccp&4) block[a->scantable.permutated[4*i+1]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+1])>>4; + if(ccp&2) block[a->scantable.permutated[4*i+2]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+2])>>4; + if(ccp&1) block[a->scantable.permutated[4*i+3]]= (asv1_get_level(&a->gb) * a->intra_matrix[4*i+3])>>4; + } + } + + return 0; +} + +static inline int asv2_decode_block(ASV1Context *a, DCTELEM block[64]){ + int i, count, ccp; + + count= asv2_get_bits(&a->gb, 4); + + block[0]= 8*asv2_get_bits(&a->gb, 8); + + ccp= get_vlc2(&a->gb, dc_ccp_vlc.table, VLC_BITS, 1); + if(ccp){ + if(ccp&4) block[a->scantable.permutated[1]]= (asv2_get_level(&a->gb) * a->intra_matrix[1])>>4; + if(ccp&2) block[a->scantable.permutated[2]]= (asv2_get_level(&a->gb) * a->intra_matrix[2])>>4; + if(ccp&1) block[a->scantable.permutated[3]]= (asv2_get_level(&a->gb) * a->intra_matrix[3])>>4; + } + + for(i=1; igb, ac_ccp_vlc.table, VLC_BITS, 1); + + if(ccp){ + if(ccp&8) block[a->scantable.permutated[4*i+0]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+0])>>4; + if(ccp&4) block[a->scantable.permutated[4*i+1]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+1])>>4; + if(ccp&2) block[a->scantable.permutated[4*i+2]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+2])>>4; + if(ccp&1) block[a->scantable.permutated[4*i+3]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+3])>>4; + } + } + + return 0; +} + +static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){ + int i; + int nc_count=0; + + put_bits(&a->pb, 8, (block[0] + 32)>>6); + block[0]= 0; + + for(i=0; i<10; i++){ + const int index= scantab[4*i]; + int ccp=0; + + if( (block[index + 0] = (block[index + 0]*a->q_intra_matrix[index + 0] + (1<<15))>>16) ) ccp |= 8; + if( (block[index + 8] = (block[index + 8]*a->q_intra_matrix[index + 8] + (1<<15))>>16) ) ccp |= 4; + if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 2; + if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1; + + if(ccp){ + for(;nc_count; nc_count--) + put_bits(&a->pb, ccp_tab[0][1], ccp_tab[0][0]); + + put_bits(&a->pb, ccp_tab[ccp][1], ccp_tab[ccp][0]); + + if(ccp&8) asv1_put_level(&a->pb, block[index + 0]); + if(ccp&4) asv1_put_level(&a->pb, block[index + 8]); + if(ccp&2) asv1_put_level(&a->pb, block[index + 1]); + if(ccp&1) asv1_put_level(&a->pb, block[index + 9]); + }else{ + nc_count++; + } + } + put_bits(&a->pb, ccp_tab[16][1], ccp_tab[16][0]); +} + +static inline void asv2_encode_block(ASV1Context *a, DCTELEM block[64]){ + int i; + int count=0; + + for(count=63; count>3; count--){ + const int index= scantab[count]; + + if( (block[index]*a->q_intra_matrix[index] + (1<<15))>>16 ) + break; + } + + count >>= 2; + + asv2_put_bits(&a->pb, 4, count); + asv2_put_bits(&a->pb, 8, (block[0] + 32)>>6); + block[0]= 0; + + for(i=0; i<=count; i++){ + const int index= scantab[4*i]; + int ccp=0; + + if( (block[index + 0] = (block[index + 0]*a->q_intra_matrix[index + 0] + (1<<15))>>16) ) ccp |= 8; + if( (block[index + 8] = (block[index + 8]*a->q_intra_matrix[index + 8] + (1<<15))>>16) ) ccp |= 4; + if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 2; + if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1; + + assert(i || ccp<8); + if(i) put_bits(&a->pb, ac_ccp_tab[ccp][1], ac_ccp_tab[ccp][0]); + else put_bits(&a->pb, dc_ccp_tab[ccp][1], dc_ccp_tab[ccp][0]); + + if(ccp){ + if(ccp&8) asv2_put_level(&a->pb, block[index + 0]); + if(ccp&4) asv2_put_level(&a->pb, block[index + 8]); + if(ccp&2) asv2_put_level(&a->pb, block[index + 1]); + if(ccp&1) asv2_put_level(&a->pb, block[index + 9]); + } + } +} + +static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64]){ + int i; + + a->dsp.clear_blocks(block[0]); + + if(a->avctx->codec_id == CODEC_ID_ASV1){ + for(i=0; i<6; i++){ + if( asv1_decode_block(a, block[i]) < 0) + return -1; + } + }else{ + for(i=0; i<6; i++){ + if( asv2_decode_block(a, block[i]) < 0) + return -1; + } + } + return 0; +} + +static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){ + int i; + + if(a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb)>>3) < 30*16*16*3/2/8){ + av_log(a->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + + if(a->avctx->codec_id == CODEC_ID_ASV1){ + for(i=0; i<6; i++) + asv1_encode_block(a, block[i]); + }else{ + for(i=0; i<6; i++) + asv2_encode_block(a, block[i]); + } + return 0; +} + +static inline void idct_put(ASV1Context *a, int mb_x, int mb_y){ + DCTELEM (*block)[64]= a->block; + int linesize= a->picture.linesize[0]; + + uint8_t *dest_y = a->picture.data[0] + (mb_y * 16* linesize ) + mb_x * 16; + uint8_t *dest_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8; + uint8_t *dest_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8; + + a->dsp.idct_put(dest_y , linesize, block[0]); + a->dsp.idct_put(dest_y + 8, linesize, block[1]); + a->dsp.idct_put(dest_y + 8*linesize , linesize, block[2]); + a->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]); + + if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ + a->dsp.idct_put(dest_cb, a->picture.linesize[1], block[4]); + a->dsp.idct_put(dest_cr, a->picture.linesize[2], block[5]); + } +} + +static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){ + DCTELEM (*block)[64]= a->block; + int linesize= a->picture.linesize[0]; + int i; + + uint8_t *ptr_y = a->picture.data[0] + (mb_y * 16* linesize ) + mb_x * 16; + uint8_t *ptr_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8; + uint8_t *ptr_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8; + + a->dsp.get_pixels(block[0], ptr_y , linesize); + a->dsp.get_pixels(block[1], ptr_y + 8, linesize); + a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize); + a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize); + for(i=0; i<4; i++) + a->dsp.fdct(block[i]); + + if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ + a->dsp.get_pixels(block[4], ptr_cb, a->picture.linesize[1]); + a->dsp.get_pixels(block[5], ptr_cr, a->picture.linesize[2]); + for(i=4; i<6; i++) + a->dsp.fdct(block[i]); + } +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + ASV1Context * const a = avctx->priv_data; + AVFrame *picture = data; + AVFrame * const p= (AVFrame*)&a->picture; + int mb_x, mb_y; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference= 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->pict_type= I_TYPE; + p->key_frame= 1; + + a->bitstream_buffer= av_fast_realloc(a->bitstream_buffer, &a->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE); + + if(avctx->codec_id == CODEC_ID_ASV1) + a->dsp.bswap_buf((uint32_t*)a->bitstream_buffer, (uint32_t*)buf, buf_size/4); + else{ + int i; + for(i=0; ibitstream_buffer[i]= ff_reverse[ buf[i] ]; + } + + init_get_bits(&a->gb, a->bitstream_buffer, buf_size*8); + + for(mb_y=0; mb_ymb_height2; mb_y++){ + for(mb_x=0; mb_xmb_width2; mb_x++){ + if( decode_mb(a, a->block) <0) + return -1; + + idct_put(a, mb_x, mb_y); + } + } + + if(a->mb_width2 != a->mb_width){ + mb_x= a->mb_width2; + for(mb_y=0; mb_ymb_height2; mb_y++){ + if( decode_mb(a, a->block) <0) + return -1; + + idct_put(a, mb_x, mb_y); + } + } + + if(a->mb_height2 != a->mb_height){ + mb_y= a->mb_height2; + for(mb_x=0; mb_xmb_width; mb_x++){ + if( decode_mb(a, a->block) <0) + return -1; + + idct_put(a, mb_x, mb_y); + } + } +#if 0 +int i; +printf("%d %d\n", 8*buf_size, get_bits_count(&a->gb)); +for(i=get_bits_count(&a->gb); i<8*buf_size; i++){ + printf("%d", get_bits1(&a->gb)); +} + +for(i=0; iavctx->extradata_size; i++){ + printf("%c\n", ((uint8_t*)s->avctx->extradata)[i]); +} +#endif + + *picture= *(AVFrame*)&a->picture; + *data_size = sizeof(AVPicture); + + emms_c(); + + return (get_bits_count(&a->gb)+31)/32*4; +} + +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + ASV1Context * const a = avctx->priv_data; + AVFrame *pict = data; + AVFrame * const p= (AVFrame*)&a->picture; + int size; + int mb_x, mb_y; + + init_put_bits(&a->pb, buf, buf_size); + + *p = *pict; + p->pict_type= I_TYPE; + p->key_frame= 1; + + for(mb_y=0; mb_ymb_height2; mb_y++){ + for(mb_x=0; mb_xmb_width2; mb_x++){ + dct_get(a, mb_x, mb_y); + encode_mb(a, a->block); + } + } + + if(a->mb_width2 != a->mb_width){ + mb_x= a->mb_width2; + for(mb_y=0; mb_ymb_height2; mb_y++){ + dct_get(a, mb_x, mb_y); + encode_mb(a, a->block); + } + } + + if(a->mb_height2 != a->mb_height){ + mb_y= a->mb_height2; + for(mb_x=0; mb_xmb_width; mb_x++){ + dct_get(a, mb_x, mb_y); + encode_mb(a, a->block); + } + } + emms_c(); + + align_put_bits(&a->pb); + while(put_bits_count(&a->pb)&31) + put_bits(&a->pb, 8, 0); + + size= put_bits_count(&a->pb)/32; + + if(avctx->codec_id == CODEC_ID_ASV1) + a->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size); + else{ + int i; + for(i=0; i<4*size; i++) + buf[i]= ff_reverse[ buf[i] ]; + } + + return size*4; +} + +static void common_init(AVCodecContext *avctx){ + ASV1Context * const a = avctx->priv_data; + + dsputil_init(&a->dsp, avctx); + + a->mb_width = (avctx->width + 15) / 16; + a->mb_height = (avctx->height + 15) / 16; + a->mb_width2 = (avctx->width + 0) / 16; + a->mb_height2 = (avctx->height + 0) / 16; + + avctx->coded_frame= (AVFrame*)&a->picture; + a->avctx= avctx; +} + +static int decode_init(AVCodecContext *avctx){ + ASV1Context * const a = avctx->priv_data; + AVFrame *p= (AVFrame*)&a->picture; + int i; + const int scale= avctx->codec_id == CODEC_ID_ASV1 ? 1 : 2; + + common_init(avctx); + init_vlcs(a); + ff_init_scantable(a->dsp.idct_permutation, &a->scantable, scantab); + avctx->pix_fmt= PIX_FMT_YUV420P; + + a->inv_qscale= ((uint8_t*)avctx->extradata)[0]; + if(a->inv_qscale == 0){ + av_log(avctx, AV_LOG_ERROR, "illegal qscale 0\n"); + if(avctx->codec_id == CODEC_ID_ASV1) + a->inv_qscale= 6; + else + a->inv_qscale= 10; + } + + for(i=0; i<64; i++){ + int index= scantab[i]; + + a->intra_matrix[i]= 64*scale*ff_mpeg1_default_intra_matrix[index] / a->inv_qscale; + } + + p->qstride= a->mb_width; + p->qscale_table= av_malloc( p->qstride * a->mb_height); + p->quality= (32*scale + a->inv_qscale/2)/a->inv_qscale; + memset(p->qscale_table, p->quality, p->qstride*a->mb_height); + + return 0; +} + +static int encode_init(AVCodecContext *avctx){ + ASV1Context * const a = avctx->priv_data; + int i; + const int scale= avctx->codec_id == CODEC_ID_ASV1 ? 1 : 2; + + common_init(avctx); + + if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE; + + a->inv_qscale= (32*scale*FF_QUALITY_SCALE + avctx->global_quality/2) / avctx->global_quality; + + avctx->extradata= av_mallocz(8); + avctx->extradata_size=8; + ((uint32_t*)avctx->extradata)[0]= le2me_32(a->inv_qscale); + ((uint32_t*)avctx->extradata)[1]= le2me_32(ff_get_fourcc("ASUS")); + + for(i=0; i<64; i++){ + int q= 32*scale*ff_mpeg1_default_intra_matrix[i]; + a->q_intra_matrix[i]= ((a->inv_qscale<<16) + q/2) / q; + } + + return 0; +} + +static int decode_end(AVCodecContext *avctx){ + ASV1Context * const a = avctx->priv_data; + + av_freep(&a->bitstream_buffer); + av_freep(&a->picture.qscale_table); + a->bitstream_buffer_size=0; + + return 0; +} + +AVCodec asv1_decoder = { + "asv1", + CODEC_TYPE_VIDEO, + CODEC_ID_ASV1, + sizeof(ASV1Context), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; + +AVCodec asv2_decoder = { + "asv2", + CODEC_TYPE_VIDEO, + CODEC_ID_ASV2, + sizeof(ASV1Context), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; + +#ifdef CONFIG_ENCODERS + +AVCodec asv1_encoder = { + "asv1", + CODEC_TYPE_VIDEO, + CODEC_ID_ASV1, + sizeof(ASV1Context), + encode_init, + encode_frame, + //encode_end, +}; + +AVCodec asv2_encoder = { + "asv2", + CODEC_TYPE_VIDEO, + CODEC_ID_ASV2, + sizeof(ASV1Context), + encode_init, + encode_frame, + //encode_end, +}; + +#endif //CONFIG_ENCODERS diff --git a/mpeg4/src/libavcodec/avcodec.h b/mpeg4/src/libavcodec/avcodec.h new file mode 100644 index 0000000000000000000000000000000000000000..0ba573e5ba6845809de0c5f5bcbc5dae47a84c3b --- /dev/null +++ b/mpeg4/src/libavcodec/avcodec.h @@ -0,0 +1,2567 @@ +#ifndef AVCODEC_H +#define AVCODEC_H + +/** + * @file avcodec.h + * external api header. + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +#include "avutil.h" +#include /* size_t */ + +//FIXME the following 2 really dont belong in here +#define FFMPEG_VERSION_INT 0x000409 +#define FFMPEG_VERSION "CVS" + +#define AV_STRINGIFY(s) AV_TOSTRING(s) +#define AV_TOSTRING(s) #s + +#define LIBAVCODEC_VERSION_INT ((51<<16)+(9<<8)+0) +#define LIBAVCODEC_VERSION 51.9.0 +#define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT + +#define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) + +#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) +#define AV_TIME_BASE 1000000 +#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE} + +enum CodecID { + CODEC_ID_NONE, + CODEC_ID_MPEG1VIDEO, + CODEC_ID_MPEG2VIDEO, /* prefered ID for MPEG Video 1 or 2 decoding */ + CODEC_ID_MPEG2VIDEO_XVMC, + CODEC_ID_H261, + CODEC_ID_H263, + CODEC_ID_RV10, + CODEC_ID_RV20, + CODEC_ID_MJPEG, + CODEC_ID_MJPEGB, + CODEC_ID_LJPEG, + CODEC_ID_SP5X, + CODEC_ID_JPEGLS, + CODEC_ID_MPEG4, + CODEC_ID_RAWVIDEO, + CODEC_ID_MSMPEG4V1, + CODEC_ID_MSMPEG4V2, + CODEC_ID_MSMPEG4V3, + CODEC_ID_WMV1, + CODEC_ID_WMV2, + CODEC_ID_H263P, + CODEC_ID_H263I, + CODEC_ID_FLV1, + CODEC_ID_SVQ1, + CODEC_ID_SVQ3, + CODEC_ID_DVVIDEO, + CODEC_ID_HUFFYUV, + CODEC_ID_CYUV, + CODEC_ID_H264, + CODEC_ID_INDEO3, + CODEC_ID_VP3, + CODEC_ID_THEORA, + CODEC_ID_ASV1, + CODEC_ID_ASV2, + CODEC_ID_FFV1, + CODEC_ID_4XM, + CODEC_ID_VCR1, + CODEC_ID_CLJR, + CODEC_ID_MDEC, + CODEC_ID_ROQ, + CODEC_ID_INTERPLAY_VIDEO, + CODEC_ID_XAN_WC3, + CODEC_ID_XAN_WC4, + CODEC_ID_RPZA, + CODEC_ID_CINEPAK, + CODEC_ID_WS_VQA, + CODEC_ID_MSRLE, + CODEC_ID_MSVIDEO1, + CODEC_ID_IDCIN, + CODEC_ID_8BPS, + CODEC_ID_SMC, + CODEC_ID_FLIC, + CODEC_ID_TRUEMOTION1, + CODEC_ID_VMDVIDEO, + CODEC_ID_MSZH, + CODEC_ID_ZLIB, + CODEC_ID_QTRLE, + CODEC_ID_SNOW, + CODEC_ID_TSCC, + CODEC_ID_ULTI, + CODEC_ID_QDRAW, + CODEC_ID_VIXL, + CODEC_ID_QPEG, + CODEC_ID_XVID, + CODEC_ID_PNG, + CODEC_ID_PPM, + CODEC_ID_PBM, + CODEC_ID_PGM, + CODEC_ID_PGMYUV, + CODEC_ID_PAM, + CODEC_ID_FFVHUFF, + CODEC_ID_RV30, + CODEC_ID_RV40, + CODEC_ID_VC9, + CODEC_ID_WMV3, + CODEC_ID_LOCO, + CODEC_ID_WNV1, + CODEC_ID_AASC, + CODEC_ID_INDEO2, + CODEC_ID_FRAPS, + CODEC_ID_TRUEMOTION2, + CODEC_ID_BMP, + CODEC_ID_CSCD, + CODEC_ID_MMVIDEO, + CODEC_ID_ZMBV, + CODEC_ID_AVS, + CODEC_ID_SMACKVIDEO, + CODEC_ID_NUV, + CODEC_ID_KMVC, + + /* various pcm "codecs" */ + CODEC_ID_PCM_S16LE= 0x10000, + CODEC_ID_PCM_S16BE, + CODEC_ID_PCM_U16LE, + CODEC_ID_PCM_U16BE, + CODEC_ID_PCM_S8, + CODEC_ID_PCM_U8, + CODEC_ID_PCM_MULAW, + CODEC_ID_PCM_ALAW, + CODEC_ID_PCM_S32LE, + CODEC_ID_PCM_S32BE, + CODEC_ID_PCM_U32LE, + CODEC_ID_PCM_U32BE, + CODEC_ID_PCM_S24LE, + CODEC_ID_PCM_S24BE, + CODEC_ID_PCM_U24LE, + CODEC_ID_PCM_U24BE, + CODEC_ID_PCM_S24DAUD, + + /* various adpcm codecs */ + CODEC_ID_ADPCM_IMA_QT= 0x11000, + CODEC_ID_ADPCM_IMA_WAV, + CODEC_ID_ADPCM_IMA_DK3, + CODEC_ID_ADPCM_IMA_DK4, + CODEC_ID_ADPCM_IMA_WS, + CODEC_ID_ADPCM_IMA_SMJPEG, + CODEC_ID_ADPCM_MS, + CODEC_ID_ADPCM_4XM, + CODEC_ID_ADPCM_XA, + CODEC_ID_ADPCM_ADX, + CODEC_ID_ADPCM_EA, + CODEC_ID_ADPCM_G726, + CODEC_ID_ADPCM_CT, + CODEC_ID_ADPCM_SWF, + CODEC_ID_ADPCM_YAMAHA, + CODEC_ID_ADPCM_SBPRO_4, + CODEC_ID_ADPCM_SBPRO_3, + CODEC_ID_ADPCM_SBPRO_2, + + /* AMR */ + CODEC_ID_AMR_NB= 0x12000, + CODEC_ID_AMR_WB, + + /* RealAudio codecs*/ + CODEC_ID_RA_144= 0x13000, + CODEC_ID_RA_288, + + /* various DPCM codecs */ + CODEC_ID_ROQ_DPCM= 0x14000, + CODEC_ID_INTERPLAY_DPCM, + CODEC_ID_XAN_DPCM, + CODEC_ID_SOL_DPCM, + + CODEC_ID_MP2= 0x15000, + CODEC_ID_MP3, /* prefered ID for MPEG Audio layer 1, 2 or3 decoding */ + CODEC_ID_AAC, + CODEC_ID_MPEG4AAC, + CODEC_ID_AC3, + CODEC_ID_DTS, + CODEC_ID_VORBIS, + CODEC_ID_DVAUDIO, + CODEC_ID_WMAV1, + CODEC_ID_WMAV2, + CODEC_ID_MACE3, + CODEC_ID_MACE6, + CODEC_ID_VMDAUDIO, + CODEC_ID_SONIC, + CODEC_ID_SONIC_LS, + CODEC_ID_FLAC, + CODEC_ID_MP3ADU, + CODEC_ID_MP3ON4, + CODEC_ID_SHORTEN, + CODEC_ID_ALAC, + CODEC_ID_WESTWOOD_SND1, + CODEC_ID_GSM, + CODEC_ID_QDM2, + CODEC_ID_COOK, + CODEC_ID_TRUESPEECH, + CODEC_ID_TTA, + CODEC_ID_SMACKAUDIO, + + CODEC_ID_OGGTHEORA= 0x16000, + + /* subtitle codecs */ + CODEC_ID_DVD_SUBTITLE= 0x17000, + CODEC_ID_DVB_SUBTITLE, + + CODEC_ID_MPEG2TS= 0x20000, /* _FAKE_ codec to indicate a raw MPEG2 transport + stream (only used by libavformat) */ +}; + +/* CODEC_ID_MP3LAME is absolete */ +#define CODEC_ID_MP3LAME CODEC_ID_MP3 + +enum CodecType { + CODEC_TYPE_UNKNOWN = -1, + CODEC_TYPE_VIDEO, + CODEC_TYPE_AUDIO, + CODEC_TYPE_DATA, + CODEC_TYPE_SUBTITLE, +}; + +/* currently unused, may be used if 24/32 bits samples ever supported */ +/* all in native endian */ +enum SampleFormat { + SAMPLE_FMT_NONE = -1, + SAMPLE_FMT_U8, ///< unsigned 8 bits + SAMPLE_FMT_S16, ///< signed 16 bits + SAMPLE_FMT_S24, ///< signed 24 bits + SAMPLE_FMT_S32, ///< signed 32 bits + SAMPLE_FMT_FLT, ///< float +}; + +/* in bytes */ +#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio + +/** + * Required number of additionally allocated bytes at the end of the input bitstream for decoding. + * this is mainly needed because some optimized bitstream readers read + * 32 or 64 bit at once and could read over the end
+ * Note, if the first 23 bits of the additional bytes are not 0 then damaged + * MPEG bitstreams could cause overread and segfault + */ +#define FF_INPUT_BUFFER_PADDING_SIZE 8 + +/** + * minimum encoding buffer size. + * used to avoid some checks during header writing + */ +#define FF_MIN_BUFFER_SIZE 16384 + +/* motion estimation type, EPZS by default */ +enum Motion_Est_ID { + ME_ZERO = 1, + ME_FULL, + ME_LOG, + ME_PHODS, + ME_EPZS, + ME_X1, + ME_HEX, + ME_UMH, + ME_ITER, +}; + +enum AVDiscard{ +//we leave some space between them for extensions (drop some keyframes for intra only or drop just some bidir frames) + AVDISCARD_NONE =-16, ///< discard nothing + AVDISCARD_DEFAULT= 0, ///< discard useless packets like 0 size packets in avi + AVDISCARD_NONREF = 8, ///< discard all non reference + AVDISCARD_BIDIR = 16, ///< discard all bidirectional frames + AVDISCARD_NONKEY = 32, ///< discard all frames except keyframes + AVDISCARD_ALL = 48, ///< discard all +}; + +typedef struct RcOverride{ + int start_frame; + int end_frame; + int qscale; // if this is 0 then quality_factor will be used instead + float quality_factor; +} RcOverride; + +/* only for ME compatiblity with old apps */ +extern int motion_estimation_method; + +#define FF_MAX_B_FRAMES 16 + +/* encoding support + these flags can be passed in AVCodecContext.flags before initing + Note: not everything is supported yet. +*/ + +#define CODEC_FLAG_QSCALE 0x0002 ///< use fixed qscale +#define CODEC_FLAG_4MV 0x0004 ///< 4 MV per MB allowed / Advanced prediction for H263 +#define CODEC_FLAG_QPEL 0x0010 ///< use qpel MC +#define CODEC_FLAG_GMC 0x0020 ///< use GMC +#define CODEC_FLAG_MV0 0x0040 ///< always try a MB with MV=<0,0> +#define CODEC_FLAG_PART 0x0080 ///< use data partitioning +/* parent program gurantees that the input for b-frame containing streams is not written to + for at least s->max_b_frames+1 frames, if this is not set than the input will be copied */ +#define CODEC_FLAG_INPUT_PRESERVED 0x0100 +#define CODEC_FLAG_PASS1 0x0200 ///< use internal 2pass ratecontrol in first pass mode +#define CODEC_FLAG_PASS2 0x0400 ///< use internal 2pass ratecontrol in second pass mode +#define CODEC_FLAG_EXTERN_HUFF 0x1000 ///< use external huffman table (for mjpeg) +#define CODEC_FLAG_GRAY 0x2000 ///< only decode/encode grayscale +#define CODEC_FLAG_EMU_EDGE 0x4000///< don't draw edges +#define CODEC_FLAG_PSNR 0x8000 ///< error[?] variables will be set during encoding +#define CODEC_FLAG_TRUNCATED 0x00010000 /** input bitstream might be truncated at a random location instead + of only at frame boundaries */ +#define CODEC_FLAG_NORMALIZE_AQP 0x00020000 ///< normalize adaptive quantization +#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< use interlaced dct +#define CODEC_FLAG_LOW_DELAY 0x00080000 ///< force low delay +#define CODEC_FLAG_ALT_SCAN 0x00100000 ///< use alternate scan +#define CODEC_FLAG_TRELLIS_QUANT 0x00200000 ///< use trellis quantization +#define CODEC_FLAG_GLOBAL_HEADER 0x00400000 ///< place global headers in extradata instead of every keyframe +#define CODEC_FLAG_BITEXACT 0x00800000 ///< use only bitexact stuff (except (i)dct) +/* Fx : Flag for h263+ extra options */ +#define CODEC_FLAG_H263P_AIC 0x01000000 ///< H263 Advanced intra coding / MPEG4 AC prediction (remove this) +#define CODEC_FLAG_AC_PRED 0x01000000 ///< H263 Advanced intra coding / MPEG4 AC prediction +#define CODEC_FLAG_H263P_UMV 0x02000000 ///< Unlimited motion vector +#define CODEC_FLAG_CBP_RD 0x04000000 ///< use rate distortion optimization for cbp +#define CODEC_FLAG_QP_RD 0x08000000 ///< use rate distortion optimization for qp selectioon +#define CODEC_FLAG_H263P_AIV 0x00000008 ///< H263 Alternative inter vlc +#define CODEC_FLAG_OBMC 0x00000001 ///< OBMC +#define CODEC_FLAG_LOOP_FILTER 0x00000800 ///< loop filter +#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 +#define CODEC_FLAG_INTERLACED_ME 0x20000000 ///< interlaced motion estimation +#define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< will reserve space for SVCD scan offset user data +#define CODEC_FLAG_CLOSED_GOP 0x80000000 +#define CODEC_FLAG2_FAST 0x00000001 ///< allow non spec compliant speedup tricks +#define CODEC_FLAG2_STRICT_GOP 0x00000002 ///< strictly enforce GOP size +#define CODEC_FLAG2_NO_OUTPUT 0x00000004 ///< skip bitstream encoding +#define CODEC_FLAG2_LOCAL_HEADER 0x00000008 ///< place global headers at every keyframe instead of in extradata +#define CODEC_FLAG2_BPYRAMID 0x00000010 ///< H.264 allow b-frames to be used as references +#define CODEC_FLAG2_WPRED 0x00000020 ///< H.264 weighted biprediction for b-frames +#define CODEC_FLAG2_MIXED_REFS 0x00000040 ///< H.264 multiple references per partition +#define CODEC_FLAG2_8X8DCT 0x00000080 ///< H.264 high profile 8x8 transform +#define CODEC_FLAG2_FASTPSKIP 0x00000100 ///< H.264 fast pskip +#define CODEC_FLAG2_AUD 0x00000200 ///< H.264 access unit delimiters +#define CODEC_FLAG2_BRDO 0x00000400 ///< b-frame rate-distortion optimization + +/* Unsupported options : + * Syntax Arithmetic coding (SAC) + * Reference Picture Selection + * Independant Segment Decoding */ +/* /Fx */ +/* codec capabilities */ + +#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< decoder can use draw_horiz_band callback +/** + * Codec uses get_buffer() for allocating buffers. + * direct rendering method 1 + */ +#define CODEC_CAP_DR1 0x0002 +/* if 'parse_only' field is true, then avcodec_parse_frame() can be + used */ +#define CODEC_CAP_PARSE_ONLY 0x0004 +#define CODEC_CAP_TRUNCATED 0x0008 +/* codec can export data for HW decoding (XvMC) */ +#define CODEC_CAP_HWACCEL 0x0010 +/** + * codec has a non zero delay and needs to be feeded with NULL at the end to get the delayed data. + * if this is not set, the codec is guranteed to never be feeded with NULL data + */ +#define CODEC_CAP_DELAY 0x0020 + +//the following defines may change, don't expect compatibility if you use them +#define MB_TYPE_INTRA4x4 0x0001 +#define MB_TYPE_INTRA16x16 0x0002 //FIXME h264 specific +#define MB_TYPE_INTRA_PCM 0x0004 //FIXME h264 specific +#define MB_TYPE_16x16 0x0008 +#define MB_TYPE_16x8 0x0010 +#define MB_TYPE_8x16 0x0020 +#define MB_TYPE_8x8 0x0040 +#define MB_TYPE_INTERLACED 0x0080 +#define MB_TYPE_DIRECT2 0x0100 //FIXME +#define MB_TYPE_ACPRED 0x0200 +#define MB_TYPE_GMC 0x0400 +#define MB_TYPE_SKIP 0x0800 +#define MB_TYPE_P0L0 0x1000 +#define MB_TYPE_P1L0 0x2000 +#define MB_TYPE_P0L1 0x4000 +#define MB_TYPE_P1L1 0x8000 +#define MB_TYPE_L0 (MB_TYPE_P0L0 | MB_TYPE_P1L0) +#define MB_TYPE_L1 (MB_TYPE_P0L1 | MB_TYPE_P1L1) +#define MB_TYPE_L0L1 (MB_TYPE_L0 | MB_TYPE_L1) +#define MB_TYPE_QUANT 0x00010000 +#define MB_TYPE_CBP 0x00020000 +//Note bits 24-31 are reserved for codec specific use (h264 ref0, mpeg1 0mv, ...) + +/** + * Pan Scan area. + * this specifies the area which should be displayed. Note there may be multiple such areas for one frame + */ +typedef struct AVPanScan{ + /** + * id. + * - encoding: set by user. + * - decoding: set by lavc + */ + int id; + + /** + * width and height in 1/16 pel + * - encoding: set by user. + * - decoding: set by lavc + */ + int width; + int height; + + /** + * position of the top left corner in 1/16 pel for up to 3 fields/frames. + * - encoding: set by user. + * - decoding: set by lavc + */ + int16_t position[3][2]; +}AVPanScan; + +#define FF_COMMON_FRAME \ + /**\ + * pointer to the picture planes.\ + * this might be different from the first allocated byte\ + * - encoding: \ + * - decoding: \ + */\ + uint8_t *data[4];\ + int linesize[4];\ + /**\ + * pointer to the first allocated byte of the picture. can be used in get_buffer/release_buffer\ + * this isn't used by lavc unless the default get/release_buffer() is used\ + * - encoding: \ + * - decoding: \ + */\ + uint8_t *base[4];\ + /**\ + * 1 -> keyframe, 0-> not\ + * - encoding: set by lavc\ + * - decoding: set by lavc\ + */\ + int key_frame;\ +\ + /**\ + * picture type of the frame, see ?_TYPE below.\ + * - encoding: set by lavc for coded_picture (and set by user for input)\ + * - decoding: set by lavc\ + */\ + int pict_type;\ +\ + /**\ + * presentation timestamp in time_base units (time when frame should be shown to user)\ + * if AV_NOPTS_VALUE then frame_rate = 1/time_base will be assumed\ + * - encoding: MUST be set by user\ + * - decoding: set by lavc\ + */\ + int64_t pts;\ +\ + /**\ + * picture number in bitstream order.\ + * - encoding: set by\ + * - decoding: set by lavc\ + */\ + int coded_picture_number;\ + /**\ + * picture number in display order.\ + * - encoding: set by\ + * - decoding: set by lavc\ + */\ + int display_picture_number;\ +\ + /**\ + * quality (between 1 (good) and FF_LAMBDA_MAX (bad)) \ + * - encoding: set by lavc for coded_picture (and set by user for input)\ + * - decoding: set by lavc\ + */\ + int quality; \ +\ + /**\ + * buffer age (1->was last buffer and dint change, 2->..., ...).\ + * set to INT_MAX if the buffer has not been used yet \ + * - encoding: unused\ + * - decoding: MUST be set by get_buffer()\ + */\ + int age;\ +\ + /**\ + * is this picture used as reference\ + * - encoding: unused\ + * - decoding: set by lavc (before get_buffer() call))\ + */\ + int reference;\ +\ + /**\ + * QP table\ + * - encoding: unused\ + * - decoding: set by lavc\ + */\ + int8_t *qscale_table;\ + /**\ + * QP store stride\ + * - encoding: unused\ + * - decoding: set by lavc\ + */\ + int qstride;\ +\ + /**\ + * mbskip_table[mb]>=1 if MB didnt change\ + * stride= mb_width = (width+15)>>4\ + * - encoding: unused\ + * - decoding: set by lavc\ + */\ + uint8_t *mbskip_table;\ +\ + /**\ + * Motion vector table.\ + * @code\ + * example:\ + * int mv_sample_log2= 4 - motion_subsample_log2;\ + * int mb_width= (width+15)>>4;\ + * int mv_stride= (mb_width << mv_sample_log2) + 1;\ + * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];\ + * @endcode\ + * - encoding: set by user\ + * - decoding: set by lavc\ + */\ + int16_t (*motion_val[2])[2];\ +\ + /**\ + * Macroblock type table\ + * mb_type_base + mb_width + 2\ + * - encoding: set by user\ + * - decoding: set by lavc\ + */\ + uint32_t *mb_type;\ +\ + /**\ + * log2 of the size of the block which a single vector in motion_val represents: \ + * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)\ + * - encoding: unused\ + * - decoding: set by lavc\ + */\ + uint8_t motion_subsample_log2;\ +\ + /**\ + * for some private data of the user\ + * - encoding: unused\ + * - decoding: set by user\ + */\ + void *opaque;\ +\ + /**\ + * error\ + * - encoding: set by lavc if flags&CODEC_FLAG_PSNR\ + * - decoding: unused\ + */\ + uint64_t error[4];\ +\ + /**\ + * type of the buffer (to keep track of who has to dealloc data[*])\ + * - encoding: set by the one who allocs it\ + * - decoding: set by the one who allocs it\ + * Note: user allocated (direct rendering) & internal buffers can not coexist currently\ + */\ + int type;\ + \ + /**\ + * when decoding, this signal how much the picture must be delayed.\ + * extra_delay = repeat_pict / (2*fps)\ + * - encoding: unused\ + * - decoding: set by lavc\ + */\ + int repeat_pict;\ + \ + /**\ + * \ + */\ + int qscale_type;\ + \ + /**\ + * The content of the picture is interlaced.\ + * - encoding: set by user\ + * - decoding: set by lavc (default 0)\ + */\ + int interlaced_frame;\ + \ + /**\ + * if the content is interlaced, is top field displayed first.\ + * - encoding: set by user\ + * - decoding: set by lavc\ + */\ + int top_field_first;\ + \ + /**\ + * Pan scan.\ + * - encoding: set by user\ + * - decoding: set by lavc\ + */\ + AVPanScan *pan_scan;\ + \ + /**\ + * tell user application that palette has changed from previous frame.\ + * - encoding: ??? (no palette-enabled encoder yet)\ + * - decoding: set by lavc (default 0)\ + */\ + int palette_has_changed;\ + \ + /**\ + * Codec suggestion on buffer type if != 0\ + * - encoding: unused\ + * - decoding: set by lavc (before get_buffer() call))\ + */\ + int buffer_hints;\ +\ + /**\ + * DCT coeffitients\ + * - encoding: unused\ + * - decoding: set by lavc\ + */\ + short *dct_coeff;\ +\ + /**\ + * Motion referece frame index\ + * - encoding: set by user\ + * - decoding: set by lavc\ + */\ + int8_t *ref_index[2]; + +#define FF_QSCALE_TYPE_MPEG1 0 +#define FF_QSCALE_TYPE_MPEG2 1 +#define FF_QSCALE_TYPE_H264 2 + +#define FF_BUFFER_TYPE_INTERNAL 1 +#define FF_BUFFER_TYPE_USER 2 ///< Direct rendering buffers (image is (de)allocated by user) +#define FF_BUFFER_TYPE_SHARED 4 ///< buffer from somewhere else, don't dealloc image (data/base), all other tables are not shared +#define FF_BUFFER_TYPE_COPY 8 ///< just a (modified) copy of some other buffer, don't dealloc anything + + +#define FF_I_TYPE 1 // Intra +#define FF_P_TYPE 2 // Predicted +#define FF_B_TYPE 3 // Bi-dir predicted +#define FF_S_TYPE 4 // S(GMC)-VOP MPEG4 +#define FF_SI_TYPE 5 +#define FF_SP_TYPE 6 + +#define FF_BUFFER_HINTS_VALID 0x01 // Buffer hints value is meaningful (if 0 ignore) +#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer +#define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content +#define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update) + +/** + * Audio Video Frame. + */ +typedef struct AVFrame { + FF_COMMON_FRAME +} AVFrame; + +#define DEFAULT_FRAME_RATE_BASE 1001000 + +/** + * Used by av_log + */ +typedef struct AVCLASS AVClass; +struct AVCLASS { + const char* class_name; + const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext + or AVFormatContext, which begin with an AVClass. + Needed because av_log is in libavcodec and has no visibility + of AVIn/OutputFormat */ + struct AVOption *option; +}; + +/** + * main external api structure. + */ +typedef struct AVCodecContext { + /** + * Info on struct for av_log + * - set by avcodec_alloc_context + */ + AVClass *av_class; + /** + * the average bitrate. + * - encoding: set by user. unused for constant quantizer encoding + * - decoding: set by lavc. 0 or some bitrate if this info is available in the stream + */ + int bit_rate; + + /** + * number of bits the bitstream is allowed to diverge from the reference. + * the reference can be CBR (for CBR pass1) or VBR (for pass2) + * - encoding: set by user. unused for constant quantizer encoding + * - decoding: unused + */ + int bit_rate_tolerance; + + /** + * CODEC_FLAG_*. + * - encoding: set by user. + * - decoding: set by user. + */ + int flags; + + /** + * some codecs needs additionnal format info. It is stored here + * - encoding: set by user. + * - decoding: set by lavc. (FIXME is this ok?) + */ + int sub_id; + + /** + * motion estimation algorithm used for video coding. + * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex), + * 8 (umh), 9 (iter) [7, 8 are x264 specific, 9 is snow specific] + * - encoding: MUST be set by user. + * - decoding: unused + */ + int me_method; + + /** + * some codecs need / can use extra-data like huffman tables. + * mjpeg: huffman tables + * rv10: additional flags + * mpeg4: global headers (they can be in the bitstream or here) + * the allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger + * then extradata_size to avoid prolems if its read with the bitstream reader + * the bytewise contents of extradata must not depend on the architecture or cpu endianness + * - encoding: set/allocated/freed by lavc. + * - decoding: set/allocated/freed by user. + */ + void *extradata; + int extradata_size; + + /** + * this is the fundamental unit of time (in seconds) in terms + * of which frame timestamps are represented. for fixed-fps content, + * timebase should be 1/framerate and timestamp increments should be + * identically 1. + * - encoding: MUST be set by user + * - decoding: set by lavc. + */ + AVRational time_base; + + /* video only */ + /** + * picture width / height. + * - encoding: MUST be set by user. + * - decoding: set by lavc. + * Note, for compatibility its possible to set this instead of + * coded_width/height before decoding + */ + int width, height; + +#define FF_ASPECT_EXTENDED 15 + + /** + * the number of pictures in a group of pitures, or 0 for intra_only. + * - encoding: set by user. + * - decoding: unused + */ + int gop_size; + + /** + * pixel format, see PIX_FMT_xxx. + * - encoding: set by user. + * - decoding: set by lavc. + */ + enum PixelFormat pix_fmt; + + /** + * Frame rate emulation. If not zero lower layer (i.e. format handler) + * has to read frames at native frame rate. + * - encoding: set by user. + * - decoding: unused. + */ + int rate_emu; + + /** + * if non NULL, 'draw_horiz_band' is called by the libavcodec + * decoder to draw an horizontal band. It improve cache usage. Not + * all codecs can do that. You must check the codec capabilities + * before + * - encoding: unused + * - decoding: set by user. + * @param height the height of the slice + * @param y the y position of the slice + * @param type 1->top field, 2->bottom field, 3->frame + * @param offset offset into the AVFrame.data from which the slice should be read + */ + void (*draw_horiz_band)(struct AVCodecContext *s, + const AVFrame *src, int offset[4], + int y, int type, int height); + + /* audio only */ + int sample_rate; ///< samples per sec + int channels; + + /** + * audio sample format. + * - encoding: set by user. + * - decoding: set by lavc. + */ + enum SampleFormat sample_fmt; ///< sample format, currenly unused + + /* the following data should not be initialized */ + /** + * samples per packet. initialized when calling 'init' + */ + int frame_size; + int frame_number; ///< audio or video frame number + int real_pict_num; ///< returns the real picture number of previous encoded frame + + /** + * number of frames the decoded output will be delayed relative to + * the encoded input. + * - encoding: set by lavc. + * - decoding: unused + */ + int delay; + + /* - encoding parameters */ + float qcompress; ///< amount of qscale change between easy & hard scenes (0.0-1.0) + float qblur; ///< amount of qscale smoothing over time (0.0-1.0) + + /** + * minimum quantizer. + * - encoding: set by user. + * - decoding: unused + */ + int qmin; + + /** + * maximum quantizer. + * - encoding: set by user. + * - decoding: unused + */ + int qmax; + + /** + * maximum quantizer difference etween frames. + * - encoding: set by user. + * - decoding: unused + */ + int max_qdiff; + + /** + * maximum number of b frames between non b frames. + * note: the output will be delayed by max_b_frames+1 relative to the input + * - encoding: set by user. + * - decoding: unused + */ + int max_b_frames; + + /** + * qscale factor between ip and b frames. + * - encoding: set by user. + * - decoding: unused + */ + float b_quant_factor; + + /** obsolete FIXME remove */ + int rc_strategy; +#define FF_RC_STRATEGY_XVID 1 + + int b_frame_strategy; + + /** + * hurry up amount. + * deprecated in favor of skip_idct and skip_frame + * - encoding: unused + * - decoding: set by user. 1-> skip b frames, 2-> skip idct/dequant too, 5-> skip everything except header + */ + int hurry_up; + + struct AVCodec *codec; + + void *priv_data; + + /* unused, FIXME remove*/ + int rtp_mode; + + int rtp_payload_size; /* The size of the RTP payload: the coder will */ + /* do it's best to deliver a chunk with size */ + /* below rtp_payload_size, the chunk will start */ + /* with a start code on some codecs like H.263 */ + /* This doesn't take account of any particular */ + /* headers inside the transmited RTP payload */ + + + /* The RTP callback: This function is called */ + /* every time the encoder has a packet to send */ + /* Depends on the encoder if the data starts */ + /* with a Start Code (it should) H.263 does. */ + /* mb_nb contains the number of macroblocks */ + /* encoded in the RTP payload */ + void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb); + + /* statistics, used for 2-pass encoding */ + int mv_bits; + int header_bits; + int i_tex_bits; + int p_tex_bits; + int i_count; + int p_count; + int skip_count; + int misc_bits; + + /** + * number of bits used for the previously encoded frame. + * - encoding: set by lavc + * - decoding: unused + */ + int frame_bits; + + /** + * private data of the user, can be used to carry app specific stuff. + * - encoding: set by user + * - decoding: set by user + */ + void *opaque; + + char codec_name[32]; + enum CodecType codec_type; /* see CODEC_TYPE_xxx */ + enum CodecID codec_id; /* see CODEC_ID_xxx */ + + /** + * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A'). + * this is used to workaround some encoder bugs + * - encoding: set by user, if not then the default based on codec_id will be used + * - decoding: set by user, will be converted to upper case by lavc during init + */ + unsigned int codec_tag; + + /** + * workaround bugs in encoders which sometimes cannot be detected automatically. + * - encoding: set by user + * - decoding: set by user + */ + int workaround_bugs; +#define FF_BUG_AUTODETECT 1 ///< autodetection +#define FF_BUG_OLD_MSMPEG4 2 +#define FF_BUG_XVID_ILACE 4 +#define FF_BUG_UMP4 8 +#define FF_BUG_NO_PADDING 16 +#define FF_BUG_AMV 32 +#define FF_BUG_AC_VLC 0 ///< will be removed, libavcodec can now handle these non compliant files by default +#define FF_BUG_QPEL_CHROMA 64 +#define FF_BUG_STD_QPEL 128 +#define FF_BUG_QPEL_CHROMA2 256 +#define FF_BUG_DIRECT_BLOCKSIZE 512 +#define FF_BUG_EDGE 1024 +#define FF_BUG_HPEL_CHROMA 2048 +#define FF_BUG_DC_CLIP 4096 +#define FF_BUG_MS 8192 ///< workaround various bugs in microsofts broken decoders +//#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100% + + /** + * luma single coeff elimination threshold. + * - encoding: set by user + * - decoding: unused + */ + int luma_elim_threshold; + + /** + * chroma single coeff elimination threshold. + * - encoding: set by user + * - decoding: unused + */ + int chroma_elim_threshold; + + /** + * strictly follow the std (MPEG4, ...). + * - encoding: set by user + * - decoding: unused + */ + int strict_std_compliance; +#define FF_COMPLIANCE_VERY_STRICT 2 ///< strictly conform to a older more strict version of the spec or reference software +#define FF_COMPLIANCE_STRICT 1 ///< strictly conform to all the things in the spec no matter what consequences +#define FF_COMPLIANCE_NORMAL 0 +#define FF_COMPLIANCE_INOFFICIAL -1 ///< allow inofficial extensions +#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< allow non standarized experimental things + + /** + * qscale offset between ip and b frames. + * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset) + * if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset) + * - encoding: set by user. + * - decoding: unused + */ + float b_quant_offset; + + /** + * error resilience higher values will detect more errors but may missdetect + * some more or less valid parts as errors. + * - encoding: unused + * - decoding: set by user + */ + int error_resilience; +#define FF_ER_CAREFUL 1 +#define FF_ER_COMPLIANT 2 +#define FF_ER_AGGRESSIVE 3 +#define FF_ER_VERY_AGGRESSIVE 4 + + /** + * called at the beginning of each frame to get a buffer for it. + * if pic.reference is set then the frame will be read later by lavc + * avcodec_align_dimensions() should be used to find the required width and + * height, as they normally need to be rounded up to the next multiple of 16 + * - encoding: unused + * - decoding: set by lavc, user can override + */ + int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic); + + /** + * called to release buffers which where allocated with get_buffer. + * a released buffer can be reused in get_buffer() + * pic.data[*] must be set to NULL + * - encoding: unused + * - decoding: set by lavc, user can override + */ + void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic); + + /** + * if 1 the stream has a 1 frame delay during decoding. + * - encoding: set by lavc + * - decoding: set by lavc + */ + int has_b_frames; + + /** + * number of bytes per packet if constant and known or 0 + * used by some WAV based audio codecs + */ + int block_align; + + int parse_only; /* - decoding only: if true, only parsing is done + (function avcodec_parse_frame()). The frame + data is returned. Only MPEG codecs support this now. */ + + /** + * 0-> h263 quant 1-> mpeg quant. + * - encoding: set by user. + * - decoding: unused + */ + int mpeg_quant; + + /** + * pass1 encoding statistics output buffer. + * - encoding: set by lavc + * - decoding: unused + */ + char *stats_out; + + /** + * pass2 encoding statistics input buffer. + * concatenated stuff from stats_out of pass1 should be placed here + * - encoding: allocated/set/freed by user + * - decoding: unused + */ + char *stats_in; + + /** + * ratecontrol qmin qmax limiting method. + * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax + * - encoding: set by user. + * - decoding: unused + */ + float rc_qsquish; + + float rc_qmod_amp; + int rc_qmod_freq; + + /** + * ratecontrol override, see RcOverride. + * - encoding: allocated/set/freed by user. + * - decoding: unused + */ + RcOverride *rc_override; + int rc_override_count; + + /** + * rate control equation. + * - encoding: set by user + * - decoding: unused + */ + char *rc_eq; + + /** + * maximum bitrate. + * - encoding: set by user. + * - decoding: unused + */ + int rc_max_rate; + + /** + * minimum bitrate. + * - encoding: set by user. + * - decoding: unused + */ + int rc_min_rate; + + /** + * decoder bitstream buffer size. + * - encoding: set by user. + * - decoding: unused + */ + int rc_buffer_size; + float rc_buffer_aggressivity; + + /** + * qscale factor between p and i frames. + * if > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset) + * if < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset) + * - encoding: set by user. + * - decoding: unused + */ + float i_quant_factor; + + /** + * qscale offset between p and i frames. + * - encoding: set by user. + * - decoding: unused + */ + float i_quant_offset; + + /** + * initial complexity for pass1 ratecontrol. + * - encoding: set by user. + * - decoding: unused + */ + float rc_initial_cplx; + + /** + * dct algorithm, see FF_DCT_* below. + * - encoding: set by user + * - decoding: unused + */ + int dct_algo; +#define FF_DCT_AUTO 0 +#define FF_DCT_FASTINT 1 +#define FF_DCT_INT 2 +#define FF_DCT_MMX 3 +#define FF_DCT_MLIB 4 +#define FF_DCT_ALTIVEC 5 +#define FF_DCT_FAAN 6 + + /** + * luminance masking (0-> disabled). + * - encoding: set by user + * - decoding: unused + */ + float lumi_masking; + + /** + * temporary complexity masking (0-> disabled). + * - encoding: set by user + * - decoding: unused + */ + float temporal_cplx_masking; + + /** + * spatial complexity masking (0-> disabled). + * - encoding: set by user + * - decoding: unused + */ + float spatial_cplx_masking; + + /** + * p block masking (0-> disabled). + * - encoding: set by user + * - decoding: unused + */ + float p_masking; + + /** + * darkness masking (0-> disabled). + * - encoding: set by user + * - decoding: unused + */ + float dark_masking; + + + /* for binary compatibility */ + int unused; + + /** + * idct algorithm, see FF_IDCT_* below. + * - encoding: set by user + * - decoding: set by user + */ + int idct_algo; +#define FF_IDCT_AUTO 0 +#define FF_IDCT_INT 1 +#define FF_IDCT_SIMPLE 2 +#define FF_IDCT_SIMPLEMMX 3 +#define FF_IDCT_LIBMPEG2MMX 4 +#define FF_IDCT_PS2 5 +#define FF_IDCT_MLIB 6 +#define FF_IDCT_ARM 7 +#define FF_IDCT_ALTIVEC 8 +#define FF_IDCT_SH4 9 +#define FF_IDCT_SIMPLEARM 10 +#define FF_IDCT_H264 11 +#define FF_IDCT_VP3 12 +#define FF_IDCT_IPP 13 +#define FF_IDCT_XVIDMMX 14 + + /** + * slice count. + * - encoding: set by lavc + * - decoding: set by user (or 0) + */ + int slice_count; + /** + * slice offsets in the frame in bytes. + * - encoding: set/allocated by lavc + * - decoding: set/allocated by user (or NULL) + */ + int *slice_offset; + + /** + * error concealment flags. + * - encoding: unused + * - decoding: set by user + */ + int error_concealment; +#define FF_EC_GUESS_MVS 1 +#define FF_EC_DEBLOCK 2 + + /** + * dsp_mask could be add used to disable unwanted CPU features + * CPU features (i.e. MMX, SSE. ...) + * + * with FORCE flag you may instead enable given CPU features + * (Dangerous: usable in case of misdetection, improper usage however will + * result into program crash) + */ + unsigned dsp_mask; +#define FF_MM_FORCE 0x80000000 /* force usage of selected flags (OR) */ + /* lower 16 bits - CPU features */ +#ifdef HAVE_MMX +#define FF_MM_MMX 0x0001 /* standard MMX */ +#define FF_MM_3DNOW 0x0004 /* AMD 3DNOW */ +#define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ +#define FF_MM_SSE 0x0008 /* SSE functions */ +#define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */ +#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ +#endif /* HAVE_MMX */ +#ifdef HAVE_IWMMXT +#define FF_MM_IWMMXT 0x0100 /* XScale IWMMXT */ +#endif /* HAVE_IWMMXT */ + + /** + * bits per sample/pixel from the demuxer (needed for huffyuv). + * - encoding: set by lavc + * - decoding: set by user + */ + int bits_per_sample; + + /** + * prediction method (needed for huffyuv). + * - encoding: set by user + * - decoding: unused + */ + int prediction_method; +#define FF_PRED_LEFT 0 +#define FF_PRED_PLANE 1 +#define FF_PRED_MEDIAN 2 + + /** + * sample aspect ratio (0 if unknown). + * numerator and denominator must be relative prime and smaller then 256 for some video standards + * - encoding: set by user. + * - decoding: set by lavc. + */ + AVRational sample_aspect_ratio; + + /** + * the picture in the bitstream. + * - encoding: set by lavc + * - decoding: set by lavc + */ + AVFrame *coded_frame; + + /** + * debug. + * - encoding: set by user. + * - decoding: set by user. + */ + int debug; +#define FF_DEBUG_PICT_INFO 1 +#define FF_DEBUG_RC 2 +#define FF_DEBUG_BITSTREAM 4 +#define FF_DEBUG_MB_TYPE 8 +#define FF_DEBUG_QP 16 +#define FF_DEBUG_MV 32 +#define FF_DEBUG_DCT_COEFF 0x00000040 +#define FF_DEBUG_SKIP 0x00000080 +#define FF_DEBUG_STARTCODE 0x00000100 +#define FF_DEBUG_PTS 0x00000200 +#define FF_DEBUG_ER 0x00000400 +#define FF_DEBUG_MMCO 0x00000800 +#define FF_DEBUG_BUGS 0x00001000 +#define FF_DEBUG_VIS_QP 0x00002000 +#define FF_DEBUG_VIS_MB_TYPE 0x00004000 + + /** + * debug. + * - encoding: set by user. + * - decoding: set by user. + */ + int debug_mv; +#define FF_DEBUG_VIS_MV_P_FOR 0x00000001 //visualize forward predicted MVs of P frames +#define FF_DEBUG_VIS_MV_B_FOR 0x00000002 //visualize forward predicted MVs of B frames +#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames + + /** + * error. + * - encoding: set by lavc if flags&CODEC_FLAG_PSNR + * - decoding: unused + */ + uint64_t error[4]; + + /** + * minimum MB quantizer. + * - encoding: unused + * - decoding: unused + */ + int mb_qmin; + + /** + * maximum MB quantizer. + * - encoding: unused + * - decoding: unused + */ + int mb_qmax; + + /** + * motion estimation compare function. + * - encoding: set by user. + * - decoding: unused + */ + int me_cmp; + /** + * subpixel motion estimation compare function. + * - encoding: set by user. + * - decoding: unused + */ + int me_sub_cmp; + /** + * macroblock compare function (not supported yet). + * - encoding: set by user. + * - decoding: unused + */ + int mb_cmp; + /** + * interlaced dct compare function + * - encoding: set by user. + * - decoding: unused + */ + int ildct_cmp; +#define FF_CMP_SAD 0 +#define FF_CMP_SSE 1 +#define FF_CMP_SATD 2 +#define FF_CMP_DCT 3 +#define FF_CMP_PSNR 4 +#define FF_CMP_BIT 5 +#define FF_CMP_RD 6 +#define FF_CMP_ZERO 7 +#define FF_CMP_VSAD 8 +#define FF_CMP_VSSE 9 +#define FF_CMP_NSSE 10 +#define FF_CMP_W53 11 +#define FF_CMP_W97 12 +#define FF_CMP_DCTMAX 13 +#define FF_CMP_DCT264 14 +#define FF_CMP_CHROMA 256 + + /** + * ME diamond size & shape. + * - encoding: set by user. + * - decoding: unused + */ + int dia_size; + + /** + * amount of previous MV predictors (2a+1 x 2a+1 square). + * - encoding: set by user. + * - decoding: unused + */ + int last_predictor_count; + + /** + * pre pass for motion estimation. + * - encoding: set by user. + * - decoding: unused + */ + int pre_me; + + /** + * motion estimation pre pass compare function. + * - encoding: set by user. + * - decoding: unused + */ + int me_pre_cmp; + + /** + * ME pre pass diamond size & shape. + * - encoding: set by user. + * - decoding: unused + */ + int pre_dia_size; + + /** + * subpel ME quality. + * - encoding: set by user. + * - decoding: unused + */ + int me_subpel_quality; + + /** + * callback to negotiate the pixelFormat. + * @param fmt is the list of formats which are supported by the codec, + * its terminated by -1 as 0 is a valid format, the formats are ordered by quality + * the first is allways the native one + * @return the choosen format + * - encoding: unused + * - decoding: set by user, if not set then the native format will always be choosen + */ + enum PixelFormat (*get_format)(struct AVCodecContext *s, const enum PixelFormat * fmt); + + /** + * DTG active format information (additionnal aspect ratio + * information only used in DVB MPEG2 transport streams). 0 if + * not set. + * + * - encoding: unused. + * - decoding: set by decoder + */ + int dtg_active_format; +#define FF_DTG_AFD_SAME 8 +#define FF_DTG_AFD_4_3 9 +#define FF_DTG_AFD_16_9 10 +#define FF_DTG_AFD_14_9 11 +#define FF_DTG_AFD_4_3_SP_14_9 13 +#define FF_DTG_AFD_16_9_SP_14_9 14 +#define FF_DTG_AFD_SP_4_3 15 + + /** + * Maximum motion estimation search range in subpel units. + * if 0 then no limit + * + * - encoding: set by user. + * - decoding: unused. + */ + int me_range; + + /** + * intra quantizer bias. + * - encoding: set by user. + * - decoding: unused + */ + int intra_quant_bias; +#define FF_DEFAULT_QUANT_BIAS 999999 + + /** + * inter quantizer bias. + * - encoding: set by user. + * - decoding: unused + */ + int inter_quant_bias; + + /** + * color table ID. + * - encoding: unused. + * - decoding: which clrtable should be used for 8bit RGB images + * table have to be stored somewhere FIXME + */ + int color_table_id; + + /** + * internal_buffer count. + * Don't touch, used by lavc default_get_buffer() + */ + int internal_buffer_count; + + /** + * internal_buffers. + * Don't touch, used by lavc default_get_buffer() + */ + void *internal_buffer; + +#define FF_LAMBDA_SHIFT 7 +#define FF_LAMBDA_SCALE (1< ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A'). + * this is used to workaround some encoder bugs + * - encoding: unused + * - decoding: set by user, will be converted to upper case by lavc during init + */ + unsigned int stream_codec_tag; + + /** + * scene change detection threshold. + * 0 is default, larger means fewer detected scene changes + * - encoding: set by user. + * - decoding: unused + */ + int scenechange_threshold; + + /** + * minimum lagrange multipler + * - encoding: set by user. + * - decoding: unused + */ + int lmin; + + /** + * maximum lagrange multipler + * - encoding: set by user. + * - decoding: unused + */ + int lmax; + + /** + * Palette control structure + * - encoding: ??? (no palette-enabled encoder yet) + * - decoding: set by user. + */ + struct AVPaletteControl *palctrl; + + /** + * noise reduction strength + * - encoding: set by user. + * - decoding: unused + */ + int noise_reduction; + + /** + * called at the beginning of a frame to get cr buffer for it. + * buffer type (size, hints) must be the same. lavc won't check it. + * lavc will pass previous buffer in pic, function should return + * same buffer or new buffer with old frame "painted" into it. + * if pic.data[0] == NULL must behave like get_buffer(). + * - encoding: unused + * - decoding: set by lavc, user can override + */ + int (*reget_buffer)(struct AVCodecContext *c, AVFrame *pic); + + /** + * number of bits which should be loaded into the rc buffer before decoding starts + * - encoding: set by user. + * - decoding: unused + */ + int rc_initial_buffer_occupancy; + + /** + * + * - encoding: set by user. + * - decoding: unused + */ + int inter_threshold; + + /** + * CODEC_FLAG2_*. + * - encoding: set by user. + * - decoding: set by user. + */ + int flags2; + + /** + * simulates errors in the bitstream to test error concealment. + * - encoding: set by user. + * - decoding: unused. + */ + int error_rate; + + /** + * MP3 antialias algorithm, see FF_AA_* below. + * - encoding: unused + * - decoding: set by user + */ + int antialias_algo; +#define FF_AA_AUTO 0 +#define FF_AA_FASTINT 1 //not implemented yet +#define FF_AA_INT 2 +#define FF_AA_FLOAT 3 + /** + * Quantizer noise shaping. + * - encoding: set by user + * - decoding: unused + */ + int quantizer_noise_shaping; + + /** + * Thread count. + * is used to decide how many independant tasks should be passed to execute() + * - encoding: set by user + * - decoding: set by user + */ + int thread_count; + + /** + * the codec may call this to execute several independant things. it will return only after + * finishing all tasks, the user may replace this with some multithreaded implementation, the + * default implementation will execute the parts serially + * @param count the number of things to execute + * - encoding: set by lavc, user can override + * - decoding: set by lavc, user can override + */ + int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void **arg2, int *ret, int count); + + /** + * Thread opaque. + * can be used by execute() to store some per AVCodecContext stuff. + * - encoding: set by execute() + * - decoding: set by execute() + */ + void *thread_opaque; + + /** + * Motion estimation threshold. under which no motion estimation is + * performed, but instead the user specified motion vectors are used + * + * - encoding: set by user + * - decoding: unused + */ + int me_threshold; + + /** + * Macroblock threshold. under which the user specified macroblock types will be used + * - encoding: set by user + * - decoding: unused + */ + int mb_threshold; + + /** + * precision of the intra dc coefficient - 8. + * - encoding: set by user + * - decoding: unused + */ + int intra_dc_precision; + + /** + * noise vs. sse weight for the nsse comparsion function. + * - encoding: set by user + * - decoding: unused + */ + int nsse_weight; + + /** + * number of macroblock rows at the top which are skipped. + * - encoding: unused + * - decoding: set by user + */ + int skip_top; + + /** + * number of macroblock rows at the bottom which are skipped. + * - encoding: unused + * - decoding: set by user + */ + int skip_bottom; + + /** + * profile + * - encoding: set by user + * - decoding: set by lavc + */ + int profile; +#define FF_PROFILE_UNKNOWN -99 + + /** + * level + * - encoding: set by user + * - decoding: set by lavc + */ + int level; +#define FF_LEVEL_UNKNOWN -99 + + /** + * low resolution decoding. 1-> 1/2 size, 2->1/4 size + * - encoding: unused + * - decoding: set by user + */ + int lowres; + + /** + * bitsream width / height. may be different from width/height if lowres + * or other things are used + * - encoding: unused + * - decoding: set by user before init if known, codec should override / dynamically change if needed + */ + int coded_width, coded_height; + + /** + * frame skip threshold + * - encoding: set by user + * - decoding: unused + */ + int frame_skip_threshold; + + /** + * frame skip factor + * - encoding: set by user + * - decoding: unused + */ + int frame_skip_factor; + + /** + * frame skip exponent + * - encoding: set by user + * - decoding: unused + */ + int frame_skip_exp; + + /** + * frame skip comparission function + * - encoding: set by user. + * - decoding: unused + */ + int frame_skip_cmp; + + /** + * border processing masking. raises the quantizer for mbs on the borders + * of the picture. + * - encoding: set by user + * - decoding: unused + */ + float border_masking; + + /** + * minimum MB lagrange multipler. + * - encoding: set by user. + * - decoding: unused + */ + int mb_lmin; + + /** + * maximum MB lagrange multipler. + * - encoding: set by user. + * - decoding: unused + */ + int mb_lmax; + + /** + * + * - encoding: set by user. + * - decoding: unused + */ + int me_penalty_compensation; + + /** + * + * - encoding: unused + * - decoding: set by user. + */ + enum AVDiscard skip_loop_filter; + + /** + * + * - encoding: unused + * - decoding: set by user. + */ + enum AVDiscard skip_idct; + + /** + * + * - encoding: unused + * - decoding: set by user. + */ + enum AVDiscard skip_frame; + + /** + * + * - encoding: set by user. + * - decoding: unused + */ + int bidir_refine; + + /** + * + * - encoding: set by user. + * - decoding: unused + */ + int brd_scale; + + /** + * constant rate factor - quality-based VBR - values ~correspond to qps + * - encoding: set by user. + * - decoding: unused + */ + int crf; + + /** + * constant quantization parameter rate control method + * - encoding: set by user. + * - decoding: unused + */ + int cqp; + + /** + * minimum gop size + * - encoding: set by user. + * - decoding: unused + */ + int keyint_min; + + /** + * number of reference frames + * - encoding: set by user. + * - decoding: unused + */ + int refs; + + /** + * chroma qp offset from luma + * - encoding: set by user. + * - decoding: unused + */ + int chromaoffset; + + /** + * influences how often b-frames are used + * - encoding: set by user. + * - decoding: unused + */ + int bframebias; + + /** + * trellis RD quantization + * - encoding: set by user. + * - decoding: unused + */ + int trellis; + + /** + * reduce fluctuations in qp (before curve compression) + * - encoding: set by user. + * - decoding: unused + */ + float complexityblur; + + /** + * in-loop deblocking filter alphac0 parameter + * alpha is in the range -6...6 + * - encoding: set by user. + * - decoding: unused + */ + int deblockalpha; + + /** + * in-loop deblocking filter beta parameter + * beta is in the range -6...6 + * - encoding: set by user. + * - decoding: unused + */ + int deblockbeta; + + /** + * macroblock subpartition sizes to consider - p8x8, p4x4, b8x8, i8x8, i4x4 + * - encoding: set by user. + * - decoding: unused + */ + int partitions; +#define X264_PART_I4X4 0x001 /* Analyse i4x4 */ +#define X264_PART_I8X8 0x002 /* Analyse i8x8 (requires 8x8 transform) */ +#define X264_PART_P8X8 0x010 /* Analyse p16x8, p8x16 and p8x8 */ +#define X264_PART_P4X4 0x020 /* Analyse p8x4, p4x8, p4x4 */ +#define X264_PART_B8X8 0x100 /* Analyse b16x8, b8x16 and b8x8 */ + + /** + * direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal) + * - encoding: set by user. + * - decoding: unused + */ + int directpred; + + /** + * audio cutoff bandwidth (0 means "automatic") . Currently used only by FAAC + * - encoding: set by user. + * - decoding: unused + */ + int cutoff; + + /** + * multiplied by qscale for each frame and added to scene_change_score + * - encoding: set by user. + * - decoding: unused + */ + int scenechange_factor; +} AVCodecContext; + +/** + * AVCodec. + */ +typedef struct AVCodec { + const char *name; + enum CodecType type; + enum CodecID id; + int priv_data_size; + int (*init)(AVCodecContext *); + int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data); + int (*close)(AVCodecContext *); + int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, + uint8_t *buf, int buf_size); + int capabilities; +#if LIBAVCODEC_VERSION_INT < ((50<<16)+(0<<8)+0) + void *dummy; // FIXME remove next time we break binary compatibility +#endif + struct AVCodec *next; + void (*flush)(AVCodecContext *); + const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0} + const enum PixelFormat *pix_fmts; ///array of supported pixel formats, or NULL if unknown, array is terminanted by -1 +} AVCodec; + +/** + * four components are given, that's all. + * the last component is alpha + */ +typedef struct AVPicture { + uint8_t *data[4]; + int linesize[4]; ///< number of bytes per line +} AVPicture; + +/** + * AVPaletteControl + * This structure defines a method for communicating palette changes + * between and demuxer and a decoder. + */ +#define AVPALETTE_SIZE 1024 +#define AVPALETTE_COUNT 256 +typedef struct AVPaletteControl { + + /* demuxer sets this to 1 to indicate the palette has changed; + * decoder resets to 0 */ + int palette_changed; + + /* 4-byte ARGB palette entries, stored in native byte order; note that + * the individual palette components should be on a 8-bit scale; if + * the palette data comes from a IBM VGA native format, the component + * data is probably 6 bits in size and needs to be scaled */ + unsigned int palette[AVPALETTE_COUNT]; + +} AVPaletteControl; + +typedef struct AVSubtitleRect { + uint16_t x; + uint16_t y; + uint16_t w; + uint16_t h; + uint16_t nb_colors; + int linesize; + uint32_t *rgba_palette; + uint8_t *bitmap; +} AVSubtitleRect; + +typedef struct AVSubtitle { + uint16_t format; /* 0 = graphics */ + uint32_t start_display_time; /* relative to packet pts, in ms */ + uint32_t end_display_time; /* relative to packet pts, in ms */ + uint32_t num_rects; + AVSubtitleRect *rects; +} AVSubtitle; + +extern AVCodec ac3_encoder; +extern AVCodec mp2_encoder; +extern AVCodec mp3lame_encoder; +extern AVCodec oggvorbis_encoder; +extern AVCodec oggtheora_encoder; +extern AVCodec faac_encoder; +extern AVCodec xvid_encoder; +extern AVCodec mpeg1video_encoder; +extern AVCodec mpeg2video_encoder; +extern AVCodec h261_encoder; +extern AVCodec h263_encoder; +extern AVCodec h263p_encoder; +extern AVCodec flv_encoder; +extern AVCodec rv10_encoder; +extern AVCodec rv20_encoder; +extern AVCodec dvvideo_encoder; +extern AVCodec mjpeg_encoder; +extern AVCodec ljpeg_encoder; +extern AVCodec jpegls_encoder; +extern AVCodec png_encoder; +extern AVCodec ppm_encoder; +extern AVCodec pgm_encoder; +extern AVCodec pgmyuv_encoder; +extern AVCodec pbm_encoder; +extern AVCodec pam_encoder; +extern AVCodec mpeg4_encoder; +extern AVCodec msmpeg4v1_encoder; +extern AVCodec msmpeg4v2_encoder; +extern AVCodec msmpeg4v3_encoder; +extern AVCodec wmv1_encoder; +extern AVCodec wmv2_encoder; +extern AVCodec huffyuv_encoder; +extern AVCodec ffvhuff_encoder; +extern AVCodec h264_encoder; +extern AVCodec asv1_encoder; +extern AVCodec asv2_encoder; +extern AVCodec vcr1_encoder; +extern AVCodec ffv1_encoder; +extern AVCodec snow_encoder; +extern AVCodec mdec_encoder; +extern AVCodec zlib_encoder; +extern AVCodec sonic_encoder; +extern AVCodec sonic_ls_encoder; +extern AVCodec svq1_encoder; +extern AVCodec x264_encoder; + +extern AVCodec h263_decoder; +extern AVCodec h261_decoder; +extern AVCodec mpeg4_decoder; +extern AVCodec msmpeg4v1_decoder; +extern AVCodec msmpeg4v2_decoder; +extern AVCodec msmpeg4v3_decoder; +extern AVCodec wmv1_decoder; +extern AVCodec wmv2_decoder; +extern AVCodec vc9_decoder; +extern AVCodec wmv3_decoder; +extern AVCodec mpeg1video_decoder; +extern AVCodec mpeg2video_decoder; +extern AVCodec mpegvideo_decoder; +extern AVCodec mpeg_xvmc_decoder; +extern AVCodec h263i_decoder; +extern AVCodec flv_decoder; +extern AVCodec rv10_decoder; +extern AVCodec rv20_decoder; +extern AVCodec rv30_decoder; +extern AVCodec rv40_decoder; +extern AVCodec svq1_decoder; +extern AVCodec svq3_decoder; +extern AVCodec dvvideo_decoder; +extern AVCodec wmav1_decoder; +extern AVCodec wmav2_decoder; +extern AVCodec mjpeg_decoder; +extern AVCodec mjpegb_decoder; +extern AVCodec sp5x_decoder; +extern AVCodec png_decoder; +extern AVCodec mp2_decoder; +extern AVCodec mp3_decoder; +extern AVCodec mp3adu_decoder; +extern AVCodec mp3on4_decoder; +extern AVCodec qdm2_decoder; +extern AVCodec cook_decoder; +extern AVCodec truespeech_decoder; +extern AVCodec tta_decoder; +extern AVCodec mace3_decoder; +extern AVCodec mace6_decoder; +extern AVCodec huffyuv_decoder; +extern AVCodec ffvhuff_decoder; +extern AVCodec oggvorbis_decoder; +extern AVCodec oggtheora_decoder; +extern AVCodec cyuv_decoder; +extern AVCodec h264_decoder; +extern AVCodec indeo3_decoder; +extern AVCodec vp3_decoder; +extern AVCodec theora_decoder; +extern AVCodec amr_nb_decoder; +extern AVCodec amr_nb_encoder; +extern AVCodec amr_wb_encoder; +extern AVCodec amr_wb_decoder; +extern AVCodec aac_decoder; +extern AVCodec mpeg4aac_decoder; +extern AVCodec asv1_decoder; +extern AVCodec asv2_decoder; +extern AVCodec vcr1_decoder; +extern AVCodec cljr_decoder; +extern AVCodec ffv1_decoder; +extern AVCodec snow_decoder; +extern AVCodec fourxm_decoder; +extern AVCodec mdec_decoder; +extern AVCodec roq_decoder; +extern AVCodec interplay_video_decoder; +extern AVCodec xan_wc3_decoder; +extern AVCodec rpza_decoder; +extern AVCodec cinepak_decoder; +extern AVCodec msrle_decoder; +extern AVCodec msvideo1_decoder; +extern AVCodec vqa_decoder; +extern AVCodec idcin_decoder; +extern AVCodec eightbps_decoder; +extern AVCodec smc_decoder; +extern AVCodec flic_decoder; +extern AVCodec vmdvideo_decoder; +extern AVCodec vmdaudio_decoder; +extern AVCodec truemotion1_decoder; +extern AVCodec truemotion2_decoder; +extern AVCodec mszh_decoder; +extern AVCodec zlib_decoder; +extern AVCodec ra_144_decoder; +extern AVCodec ra_288_decoder; +extern AVCodec roq_dpcm_decoder; +extern AVCodec interplay_dpcm_decoder; +extern AVCodec xan_dpcm_decoder; +extern AVCodec sol_dpcm_decoder; +extern AVCodec sonic_decoder; +extern AVCodec qtrle_decoder; +extern AVCodec flac_decoder; +extern AVCodec tscc_decoder; +extern AVCodec cscd_decoder; +extern AVCodec nuv_decoder; +extern AVCodec ulti_decoder; +extern AVCodec qdraw_decoder; +extern AVCodec xl_decoder; +extern AVCodec qpeg_decoder; +extern AVCodec shorten_decoder; +extern AVCodec loco_decoder; +extern AVCodec wnv1_decoder; +extern AVCodec aasc_decoder; +extern AVCodec alac_decoder; +extern AVCodec ws_snd1_decoder; +extern AVCodec indeo2_decoder; +extern AVCodec vorbis_decoder; +extern AVCodec fraps_decoder; +extern AVCodec libgsm_encoder; +extern AVCodec libgsm_decoder; +extern AVCodec bmp_decoder; +extern AVCodec mmvideo_decoder; +extern AVCodec zmbv_decoder; +extern AVCodec avs_decoder; +extern AVCodec smacker_decoder; +extern AVCodec smackaud_decoder; +extern AVCodec kmvc_decoder; + +/* pcm codecs */ +#define PCM_CODEC(id, name) \ +extern AVCodec name ## _decoder; \ +extern AVCodec name ## _encoder + +PCM_CODEC(CODEC_ID_PCM_S32LE, pcm_s32le); +PCM_CODEC(CODEC_ID_PCM_S32BE, pcm_s32be); +PCM_CODEC(CODEC_ID_PCM_U32LE, pcm_u32le); +PCM_CODEC(CODEC_ID_PCM_U32BE, pcm_u32be); +PCM_CODEC(CODEC_ID_PCM_S24LE, pcm_s24le); +PCM_CODEC(CODEC_ID_PCM_S24BE, pcm_s24be); +PCM_CODEC(CODEC_ID_PCM_U24LE, pcm_u24le); +PCM_CODEC(CODEC_ID_PCM_U24BE, pcm_u24be); +PCM_CODEC(CODEC_ID_PCM_S24DAUD, pcm_s24daud); +PCM_CODEC(CODEC_ID_PCM_S16LE, pcm_s16le); +PCM_CODEC(CODEC_ID_PCM_S16BE, pcm_s16be); +PCM_CODEC(CODEC_ID_PCM_U16LE, pcm_u16le); +PCM_CODEC(CODEC_ID_PCM_U16BE, pcm_u16be); +PCM_CODEC(CODEC_ID_PCM_S8, pcm_s8); +PCM_CODEC(CODEC_ID_PCM_U8, pcm_u8); +PCM_CODEC(CODEC_ID_PCM_ALAW, pcm_alaw); +PCM_CODEC(CODEC_ID_PCM_MULAW, pcm_mulaw); + +/* adpcm codecs */ + +PCM_CODEC(CODEC_ID_ADPCM_IMA_QT, adpcm_ima_qt); +PCM_CODEC(CODEC_ID_ADPCM_IMA_WAV, adpcm_ima_wav); +PCM_CODEC(CODEC_ID_ADPCM_IMA_DK3, adpcm_ima_dk3); +PCM_CODEC(CODEC_ID_ADPCM_IMA_DK4, adpcm_ima_dk4); +PCM_CODEC(CODEC_ID_ADPCM_IMA_WS, adpcm_ima_ws); +PCM_CODEC(CODEC_ID_ADPCM_SMJPEG, adpcm_ima_smjpeg); +PCM_CODEC(CODEC_ID_ADPCM_MS, adpcm_ms); +PCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm); +PCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa); +PCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx); +PCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea); +PCM_CODEC(CODEC_ID_ADPCM_G726, adpcm_g726); +PCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); +PCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); +PCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2); + +#undef PCM_CODEC + +/* dummy raw video codec */ +extern AVCodec rawvideo_encoder; +extern AVCodec rawvideo_decoder; + +/* the following codecs use external GPL libs */ +extern AVCodec ac3_decoder; +extern AVCodec dts_decoder; + +/* subtitles */ +extern AVCodec dvdsub_encoder; +extern AVCodec dvdsub_decoder; +extern AVCodec dvbsub_encoder; +extern AVCodec dvbsub_decoder; + +/* resample.c */ + +struct ReSampleContext; +struct AVResampleContext; + +typedef struct ReSampleContext ReSampleContext; + +ReSampleContext *audio_resample_init(int output_channels, int input_channels, + int output_rate, int input_rate); +int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples); +void audio_resample_close(ReSampleContext *s); + +struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff); +int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx); +void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance); +void av_resample_close(struct AVResampleContext *c); + +/* YUV420 format is assumed ! */ + +struct ImgReSampleContext; + +typedef struct ImgReSampleContext ImgReSampleContext; + +ImgReSampleContext *img_resample_init(int output_width, int output_height, + int input_width, int input_height); + +ImgReSampleContext *img_resample_full_init(int owidth, int oheight, + int iwidth, int iheight, + int topBand, int bottomBand, + int leftBand, int rightBand, + int padtop, int padbottom, + int padleft, int padright); + + +void img_resample(ImgReSampleContext *s, + AVPicture *output, const AVPicture *input); + +void img_resample_close(ImgReSampleContext *s); + +/** + * Allocate memory for a picture. Call avpicture_free to free it. + * + * @param picture the picture to be filled in. + * @param pix_fmt the format of the picture. + * @param width the width of the picture. + * @param height the height of the picture. + * @return 0 if successful, -1 if not. + */ +int avpicture_alloc(AVPicture *picture, int pix_fmt, int width, int height); + +/* Free a picture previously allocated by avpicture_alloc. */ +void avpicture_free(AVPicture *picture); + +int avpicture_fill(AVPicture *picture, uint8_t *ptr, + int pix_fmt, int width, int height); +int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, + unsigned char *dest, int dest_size); +int avpicture_get_size(int pix_fmt, int width, int height); +void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift); +const char *avcodec_get_pix_fmt_name(int pix_fmt); +void avcodec_set_dimensions(AVCodecContext *s, int width, int height); +enum PixelFormat avcodec_get_pix_fmt(const char* name); +unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat p); + +#define FF_LOSS_RESOLUTION 0x0001 /* loss due to resolution change */ +#define FF_LOSS_DEPTH 0x0002 /* loss due to color depth change */ +#define FF_LOSS_COLORSPACE 0x0004 /* loss due to color space conversion */ +#define FF_LOSS_ALPHA 0x0008 /* loss of alpha bits */ +#define FF_LOSS_COLORQUANT 0x0010 /* loss due to color quantization */ +#define FF_LOSS_CHROMA 0x0020 /* loss of chroma (e.g. rgb to gray conversion) */ + +int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt, + int has_alpha); +int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt, + int has_alpha, int *loss_ptr); + +#define FF_ALPHA_TRANSP 0x0001 /* image has some totally transparent pixels */ +#define FF_ALPHA_SEMI_TRANSP 0x0002 /* image has some transparent pixels */ +int img_get_alpha_info(const AVPicture *src, + int pix_fmt, int width, int height); + +/* convert among pixel formats */ +int img_convert(AVPicture *dst, int dst_pix_fmt, + const AVPicture *src, int pix_fmt, + int width, int height); + +/* deinterlace a picture */ +int avpicture_deinterlace(AVPicture *dst, const AVPicture *src, + int pix_fmt, int width, int height); + +/* external high level API */ + +extern AVCodec *first_avcodec; + +/* returns LIBAVCODEC_VERSION_INT constant */ +unsigned avcodec_version(void); +/* returns LIBAVCODEC_BUILD constant */ +unsigned avcodec_build(void); +void avcodec_init(void); + +void register_avcodec(AVCodec *format); +AVCodec *avcodec_find_encoder(enum CodecID id); +AVCodec *avcodec_find_encoder_by_name(const char *name); +AVCodec *avcodec_find_decoder(enum CodecID id); +AVCodec *avcodec_find_decoder_by_name(const char *name); +void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode); + +void avcodec_get_context_defaults(AVCodecContext *s); +AVCodecContext *avcodec_alloc_context(void); +void avcodec_get_frame_defaults(AVFrame *pic); +AVFrame *avcodec_alloc_frame(void); + +int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic); +void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic); +int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic); +void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height); +int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h); +enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt); + +int avcodec_thread_init(AVCodecContext *s, int thread_count); +void avcodec_thread_free(AVCodecContext *s); +int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count); +int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count); +//FIXME func typedef + +/** + * opens / inits the AVCodecContext. + * not thread save! + */ +int avcodec_open(AVCodecContext *avctx, AVCodec *codec); + +int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples, + int *frame_size_ptr, + uint8_t *buf, int buf_size); +int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture, + int *got_picture_ptr, + uint8_t *buf, int buf_size); +int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub, + int *got_sub_ptr, + const uint8_t *buf, int buf_size); +int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata, + int *data_size_ptr, + uint8_t *buf, int buf_size); +int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size, + const short *samples); +int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size, + const AVFrame *pict); +int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size, + const AVSubtitle *sub); + +int avcodec_close(AVCodecContext *avctx); + +void avcodec_register_all(void); + +void avcodec_flush_buffers(AVCodecContext *avctx); + +void avcodec_default_free_buffers(AVCodecContext *s); + +/* misc usefull functions */ + +/** + * returns a single letter to describe the picture type + */ +char av_get_pict_type_char(int pict_type); + + +/* frame parsing */ +typedef struct AVCodecParserContext { + void *priv_data; + struct AVCodecParser *parser; + int64_t frame_offset; /* offset of the current frame */ + int64_t cur_offset; /* current offset + (incremented by each av_parser_parse()) */ + int64_t last_frame_offset; /* offset of the last frame */ + /* video info */ + int pict_type; /* XXX: put it back in AVCodecContext */ + int repeat_pict; /* XXX: put it back in AVCodecContext */ + int64_t pts; /* pts of the current frame */ + int64_t dts; /* dts of the current frame */ + + /* private data */ + int64_t last_pts; + int64_t last_dts; + int fetch_timestamp; + +#define AV_PARSER_PTS_NB 4 + int cur_frame_start_index; + int64_t cur_frame_offset[AV_PARSER_PTS_NB]; + int64_t cur_frame_pts[AV_PARSER_PTS_NB]; + int64_t cur_frame_dts[AV_PARSER_PTS_NB]; + + int flags; +#define PARSER_FLAG_COMPLETE_FRAMES 0x0001 +} AVCodecParserContext; + +typedef struct AVCodecParser { + int codec_ids[5]; /* several codec IDs are permitted */ + int priv_data_size; + int (*parser_init)(AVCodecParserContext *s); + int (*parser_parse)(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size); + void (*parser_close)(AVCodecParserContext *s); + int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size); + struct AVCodecParser *next; +} AVCodecParser; + +extern AVCodecParser *av_first_parser; + +void av_register_codec_parser(AVCodecParser *parser); +AVCodecParserContext *av_parser_init(int codec_id); +int av_parser_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, + int64_t pts, int64_t dts); +int av_parser_change(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe); +void av_parser_close(AVCodecParserContext *s); + +extern AVCodecParser mpegvideo_parser; +extern AVCodecParser mpeg4video_parser; +extern AVCodecParser h261_parser; +extern AVCodecParser h263_parser; +extern AVCodecParser h264_parser; +extern AVCodecParser mjpeg_parser; +extern AVCodecParser pnm_parser; +extern AVCodecParser mpegaudio_parser; +extern AVCodecParser ac3_parser; +extern AVCodecParser dvdsub_parser; +extern AVCodecParser dvbsub_parser; +extern AVCodecParser aac_parser; + +/* memory */ +void *av_malloc(unsigned int size); +void *av_mallocz(unsigned int size); +void *av_realloc(void *ptr, unsigned int size); +void av_free(void *ptr); +char *av_strdup(const char *s); +void av_freep(void *ptr); +void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size); +/* for static data only */ +/* call av_free_static to release all staticaly allocated tables */ +void av_free_static(void); +void *av_mallocz_static(unsigned int size); +void *av_realloc_static(void *ptr, unsigned int size); + +/* add by bero : in adx.c */ +int is_adx(const unsigned char *buf,size_t bufsize); + +void img_copy(AVPicture *dst, const AVPicture *src, + int pix_fmt, int width, int height); + +int img_crop(AVPicture *dst, const AVPicture *src, + int pix_fmt, int top_band, int left_band); + +int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt, + int padtop, int padbottom, int padleft, int padright, int *color); + +/* av_log API */ + +#include + +#define AV_LOG_QUIET -1 +#define AV_LOG_ERROR 0 +#define AV_LOG_INFO 1 +#define AV_LOG_DEBUG 2 + +#ifdef __GNUC__ +extern void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4))); +#else +extern void av_log(void*, int level, const char *fmt, ...); +#endif + +extern void av_vlog(void*, int level, const char *fmt, va_list); +extern int av_log_get_level(void); +extern void av_log_set_level(int); +extern void av_log_set_callback(void (*)(void*, int, const char*, va_list)); + +/* endian macros */ +#if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32) +#define BE_16(x) ((((uint8_t*)(x))[0] << 8) | ((uint8_t*)(x))[1]) +#define BE_32(x) ((((uint8_t*)(x))[0] << 24) | \ + (((uint8_t*)(x))[1] << 16) | \ + (((uint8_t*)(x))[2] << 8) | \ + ((uint8_t*)(x))[3]) +#define LE_16(x) ((((uint8_t*)(x))[1] << 8) | ((uint8_t*)(x))[0]) +#define LE_32(x) ((((uint8_t*)(x))[3] << 24) | \ + (((uint8_t*)(x))[2] << 16) | \ + (((uint8_t*)(x))[1] << 8) | \ + ((uint8_t*)(x))[0]) +#endif + +extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v); + +#ifdef __cplusplus +} +#endif + +#endif /* AVCODEC_H */ diff --git a/mpeg4/src/libavcodec/avs.c b/mpeg4/src/libavcodec/avs.c new file mode 100644 index 0000000000000000000000000000000000000000..557e9becb32c130ac075f57c27d5de300b4906d1 --- /dev/null +++ b/mpeg4/src/libavcodec/avs.c @@ -0,0 +1,158 @@ +/* + * AVS video decoder. + * Copyright (c) 2006 Aurelien Jacobs + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" +#include "bitstream.h" + + +typedef struct { + AVFrame picture; +} avs_context_t; + +typedef enum { + AVS_VIDEO = 0x01, + AVS_AUDIO = 0x02, + AVS_PALETTE = 0x03, + AVS_GAME_DATA = 0x04, +} avs_block_type_t; + +typedef enum { + AVS_I_FRAME = 0x00, + AVS_P_FRAME_3X3 = 0x01, + AVS_P_FRAME_2X2 = 0x02, + AVS_P_FRAME_2X3 = 0x03, +} avs_video_sub_type_t; + + +static int +avs_decode_frame(AVCodecContext * avctx, + void *data, int *data_size, uint8_t * buf, int buf_size) +{ + avs_context_t *const avs = avctx->priv_data; + AVFrame *picture = data; + AVFrame *const p = (AVFrame *) & avs->picture; + uint8_t *table, *vect, *out; + int i, j, x, y, stride, vect_w = 3, vect_h = 3; + int sub_type; + avs_block_type_t type; + GetBitContext change_map; + + if (avctx->reget_buffer(avctx, p)) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + p->reference = 1; + p->pict_type = FF_P_TYPE; + p->key_frame = 0; + + out = avs->picture.data[0]; + stride = avs->picture.linesize[0]; + + sub_type = buf[0]; + type = buf[1]; + buf += 4; + + if (type == AVS_PALETTE) { + int first, last; + uint32_t *pal = (uint32_t *) avs->picture.data[1]; + + first = LE_16(buf); + last = first + LE_16(buf + 2); + buf += 4; + for (i=first; ipict_type = FF_I_TYPE; + p->key_frame = 1; + case AVS_P_FRAME_3X3: + vect_w = 3; + vect_h = 3; + break; + + case AVS_P_FRAME_2X2: + vect_w = 2; + vect_h = 2; + break; + + case AVS_P_FRAME_2X3: + vect_w = 2; + vect_h = 3; + break; + + default: + return -1; + } + + table = buf + (256 * vect_w * vect_h); + if (sub_type != AVS_I_FRAME) { + int map_size = ((318 / vect_w + 7) / 8) * (198 / vect_h); + init_get_bits(&change_map, table, map_size); + table += map_size; + } + + for (y=0; y<198; y+=vect_h) { + for (x=0; x<318; x+=vect_w) { + if (sub_type == AVS_I_FRAME || get_bits1(&change_map)) { + vect = &buf[*table++ * (vect_w * vect_h)]; + for (j=0; jpicture; + *data_size = sizeof(AVPicture); + + return buf_size; +} + +static int avs_decode_init(AVCodecContext * avctx) +{ + avctx->pix_fmt = PIX_FMT_PAL8; + return 0; +} + +AVCodec avs_decoder = { + "avs", + CODEC_TYPE_VIDEO, + CODEC_ID_AVS, + sizeof(avs_context_t), + avs_decode_init, + NULL, + NULL, + avs_decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/beosthread.c b/mpeg4/src/libavcodec/beosthread.c new file mode 100644 index 0000000000000000000000000000000000000000..54595f24151a1a5ac46f1e2889ed402f128a038e --- /dev/null +++ b/mpeg4/src/libavcodec/beosthread.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2004 François Revol + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ +//#define DEBUG + +#include "avcodec.h" +#include "common.h" + +#include + +typedef struct ThreadContext{ + AVCodecContext *avctx; + thread_id thread; + sem_id work_sem; + sem_id done_sem; + int (*func)(AVCodecContext *c, void *arg); + void *arg; + int ret; +}ThreadContext; + +// it's odd Be never patented that :D +struct benaphore { + vint32 atom; + sem_id sem; +}; +static inline int lock_ben(struct benaphore *ben) +{ + if (atomic_add(&ben->atom, 1) > 0) + return acquire_sem(ben->sem); + return B_OK; +} +static inline int unlock_ben(struct benaphore *ben) +{ + if (atomic_add(&ben->atom, -1) > 1) + return release_sem(ben->sem); + return B_OK; +} + +static struct benaphore av_thread_lib_ben; + +static int32 ff_thread_func(void *v){ + ThreadContext *c= v; + + for(;;){ +//printf("thread_func %X enter wait\n", (int)v); fflush(stdout); + acquire_sem(c->work_sem); +//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout); + if(c->func) + c->ret= c->func(c->avctx, c->arg); + else + return 0; +//printf("thread_func %X signal complete\n", (int)v); fflush(stdout); + release_sem(c->done_sem); + } + + return B_OK; +} + +/** + * free what has been allocated by avcodec_thread_init(). + * must be called after decoding has finished, especially dont call while avcodec_thread_execute() is running + */ +void avcodec_thread_free(AVCodecContext *s){ + ThreadContext *c= s->thread_opaque; + int i; + int32 ret; + + for(i=0; ithread_count; i++){ + + c[i].func= NULL; + release_sem(c[i].work_sem); + wait_for_thread(c[i].thread, &ret); + if(c[i].work_sem > B_OK) delete_sem(c[i].work_sem); + if(c[i].done_sem > B_OK) delete_sem(c[i].done_sem); + } + + av_freep(&s->thread_opaque); +} + +int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void **arg, int *ret, int count){ + ThreadContext *c= s->thread_opaque; + int i; + + assert(s == c->avctx); + assert(count <= s->thread_count); + + /* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */ + + for(i=0; ithread_count= thread_count; + + assert(!s->thread_opaque); + c= av_mallocz(sizeof(ThreadContext)*thread_count); + s->thread_opaque= c; + + for(i=0; iexecute= avcodec_thread_execute; + + return 0; +fail: + avcodec_thread_free(s); + return -1; +} + +/* provide a mean to serialize calls to avcodec_*() for thread safety. */ + +int avcodec_thread_lock_lib(void) +{ + return lock_ben(&av_thread_lib_ben); +} + +int avcodec_thread_unlock_lib(void) +{ + return unlock_ben(&av_thread_lib_ben); +} + +/* our versions of _init and _fini (which are called by those actually from crt.o) */ + +void initialize_after(void) +{ + av_thread_lib_ben.atom = 0; + av_thread_lib_ben.sem = create_sem(0, "libavcodec benaphore"); +} + +void uninitialize_before(void) +{ + delete_sem(av_thread_lib_ben.sem); +} + + + diff --git a/mpeg4/src/libavcodec/bitstream.c b/mpeg4/src/libavcodec/bitstream.c new file mode 100644 index 0000000000000000000000000000000000000000..49c6ece1b0d0de57f2cacf2ccb5dc52791b3047d --- /dev/null +++ b/mpeg4/src/libavcodec/bitstream.c @@ -0,0 +1,293 @@ +/* + * Common bit i/o utils + * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * alternative bitstream reader & writer by Michael Niedermayer + */ + +/** + * @file bitstream.c + * bitstream api. + */ + +#include "avcodec.h" +#include "bitstream.h" + +void align_put_bits(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + put_bits(s,( - s->index) & 7,0); +#else + put_bits(s,s->bit_left & 7,0); +#endif +} + +void ff_put_string(PutBitContext * pbc, char *s, int put_zero) +{ + while(*s){ + put_bits(pbc, 8, *s); + s++; + } + if(put_zero) + put_bits(pbc, 8, 0); +} + +/* bit input functions */ + +/** + * reads 0-32 bits. + */ +unsigned int get_bits_long(GetBitContext *s, int n){ + if(n<=17) return get_bits(s, n); + else{ + int ret= get_bits(s, 16) << (n-16); + return ret | get_bits(s, n-16); + } +} + +/** + * shows 0-32 bits. + */ +unsigned int show_bits_long(GetBitContext *s, int n){ + if(n<=17) return show_bits(s, n); + else{ + GetBitContext gb= *s; + int ret= get_bits_long(s, n); + *s= gb; + return ret; + } +} + +void align_get_bits(GetBitContext *s) +{ + int n= (-get_bits_count(s)) & 7; + if(n) skip_bits(s, n); +} + +int check_marker(GetBitContext *s, const char *msg) +{ + int bit= get_bits1(s); + if(!bit) + av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg); + + return bit; +} + +/* VLC decoding */ + +//#define DEBUG_VLC + +#define GET_DATA(v, table, i, wrap, size) \ +{\ + const uint8_t *ptr = (const uint8_t *)table + i * wrap;\ + switch(size) {\ + case 1:\ + v = *(const uint8_t *)ptr;\ + break;\ + case 2:\ + v = *(const uint16_t *)ptr;\ + break;\ + default:\ + v = *(const uint32_t *)ptr;\ + break;\ + }\ +} + + +static int alloc_table(VLC *vlc, int size, int use_static) +{ + int index; + index = vlc->table_size; + vlc->table_size += size; + if (vlc->table_size > vlc->table_allocated) { + vlc->table_allocated += (1 << vlc->bits); + if(use_static) + vlc->table = av_realloc_static(vlc->table, + sizeof(VLC_TYPE) * 2 * vlc->table_allocated); + else + vlc->table = av_realloc(vlc->table, + sizeof(VLC_TYPE) * 2 * vlc->table_allocated); + if (!vlc->table) + return -1; + } + return index; +} + +static int build_table(VLC *vlc, int table_nb_bits, + int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + uint32_t code_prefix, int n_prefix, int flags) +{ + int i, j, k, n, table_size, table_index, nb, n1, index, code_prefix2; + uint32_t code; + VLC_TYPE (*table)[2]; + + table_size = 1 << table_nb_bits; + table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_STATIC); +#ifdef DEBUG_VLC + printf("new table index=%d size=%d code_prefix=%x n=%d\n", + table_index, table_size, code_prefix, n_prefix); +#endif + if (table_index < 0) + return -1; + table = &vlc->table[table_index]; + + for(i=0;i=32 ? 0xffffffff : (1 << n_prefix)-1); + else + code_prefix2= code >> n; + if (n > 0 && code_prefix2 == code_prefix) { + if (n <= table_nb_bits) { + /* no need to add another table */ + j = (code << (table_nb_bits - n)) & (table_size - 1); + nb = 1 << (table_nb_bits - n); + for(k=0;k> n_prefix) + (k<> ((flags & INIT_VLC_LE) ? n_prefix : n)) & ((1 << table_nb_bits) - 1); +#ifdef DEBUG_VLC + printf("%4x: n=%d (subtable)\n", + j, n); +#endif + /* compute table size */ + n1 = -table[j][1]; //bits + if (n > n1) + n1 = n; + table[j][1] = -n1; //bits + } + } + } + + /* second pass : fill auxillary tables recursively */ + for(i=0;i table_nb_bits) { + n = table_nb_bits; + table[i][1] = -n; //bits + } + index = build_table(vlc, n, nb_codes, + bits, bits_wrap, bits_size, + codes, codes_wrap, codes_size, + (flags & INIT_VLC_LE) ? (code_prefix | (i << n_prefix)) : ((code_prefix << table_nb_bits) | i), + n_prefix + table_nb_bits, flags); + if (index < 0) + return -1; + /* note: realloc has been done, so reload tables */ + table = &vlc->table[table_index]; + table[i][0] = index; //code + } + } + return table_index; +} + + +/* Build VLC decoding tables suitable for use with get_vlc(). + + 'nb_bits' set thee decoding table size (2^nb_bits) entries. The + bigger it is, the faster is the decoding. But it should not be too + big to save memory and L1 cache. '9' is a good compromise. + + 'nb_codes' : number of vlcs codes + + 'bits' : table which gives the size (in bits) of each vlc code. + + 'codes' : table which gives the bit pattern of of each vlc code. + + 'xxx_wrap' : give the number of bytes between each entry of the + 'bits' or 'codes' tables. + + 'xxx_size' : gives the number of bytes of each entry of the 'bits' + or 'codes' tables. + + 'wrap' and 'size' allows to use any memory configuration and types + (byte/word/long) to store the 'bits' and 'codes' tables. + + 'use_static' should be set to 1 for tables, which should be freed + with av_free_static(), 0 if free_vlc() will be used. +*/ +int init_vlc(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + int use_static) +{ + vlc->bits = nb_bits; + if(!use_static) { + vlc->table = NULL; + vlc->table_allocated = 0; + vlc->table_size = 0; + } else { + /* Static tables are initially always NULL, return + if vlc->table != NULL to avoid double allocation */ + if(vlc->table) + return 0; + } + +#ifdef DEBUG_VLC + printf("build table nb_codes=%d\n", nb_codes); +#endif + + if (build_table(vlc, nb_bits, nb_codes, + bits, bits_wrap, bits_size, + codes, codes_wrap, codes_size, + 0, 0, use_static) < 0) { + av_free(vlc->table); + return -1; + } + return 0; +} + + +void free_vlc(VLC *vlc) +{ + av_free(vlc->table); +} + diff --git a/mpeg4/src/libavcodec/bitstream.h b/mpeg4/src/libavcodec/bitstream.h new file mode 100644 index 0000000000000000000000000000000000000000..b425049900fdf42620d74a3c6c00f44c83db3382 --- /dev/null +++ b/mpeg4/src/libavcodec/bitstream.h @@ -0,0 +1,873 @@ +/** + * @file bitstream.h + * bitstream api header. + */ + +#ifndef BITSTREAM_H +#define BITSTREAM_H + +//#define ALT_BITSTREAM_WRITER +//#define ALIGNED_BITSTREAM_WRITER + +#define ALT_BITSTREAM_READER +//#define LIBMPEG2_BITSTREAM_READER +//#define A32_BITSTREAM_READER +#define LIBMPEG2_BITSTREAM_READER_HACK //add BERO + +extern const uint8_t ff_reverse[256]; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +// avoid +32 for shift optimization (gcc should do that ...) +static inline int32_t NEG_SSR32( int32_t a, int8_t s){ + asm ("sarl %1, %0\n\t" + : "+r" (a) + : "ic" ((uint8_t)(-s)) + ); + return a; +} +static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ + asm ("shrl %1, %0\n\t" + : "+r" (a) + : "ic" ((uint8_t)(-s)) + ); + return a; +} +#else +# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s))) +# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s))) +#endif + +/* bit output */ + +/* buf and buf_end must be present and used by every alternative writer. */ +typedef struct PutBitContext { +#ifdef ALT_BITSTREAM_WRITER + uint8_t *buf, *buf_end; + int index; +#else + uint32_t bit_buf; + int bit_left; + uint8_t *buf, *buf_ptr, *buf_end; +#endif +} PutBitContext; + +static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size) +{ + if(buffer_size < 0) { + buffer_size = 0; + buffer = NULL; + } + + s->buf = buffer; + s->buf_end = s->buf + buffer_size; +#ifdef ALT_BITSTREAM_WRITER + s->index=0; + ((uint32_t*)(s->buf))[0]=0; +// memset(buffer, 0, buffer_size); +#else + s->buf_ptr = s->buf; + s->bit_left=32; + s->bit_buf=0; +#endif +} + +/* return the number of bits output */ +static inline int put_bits_count(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + return s->index; +#else + return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left; +#endif +} + +/* pad the end of the output stream with zeros */ +static inline void flush_put_bits(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + align_put_bits(s); +#else + s->bit_buf<<= s->bit_left; + while (s->bit_left < 32) { + /* XXX: should test end of buffer */ + *s->buf_ptr++=s->bit_buf >> 24; + s->bit_buf<<=8; + s->bit_left+=8; + } + s->bit_left=32; + s->bit_buf=0; +#endif +} + +void align_put_bits(PutBitContext *s); +void ff_put_string(PutBitContext * pbc, char *s, int put_zero); + +/* bit input */ +/* buffer, buffer_end and size_in_bits must be present and used by every reader */ +typedef struct GetBitContext { + const uint8_t *buffer, *buffer_end; +#ifdef ALT_BITSTREAM_READER + int index; +#elif defined LIBMPEG2_BITSTREAM_READER + uint8_t *buffer_ptr; + uint32_t cache; + int bit_count; +#elif defined A32_BITSTREAM_READER + uint32_t *buffer_ptr; + uint32_t cache0; + uint32_t cache1; + int bit_count; +#endif + int size_in_bits; +} GetBitContext; + +#define VLC_TYPE int16_t + +typedef struct VLC { + int bits; + VLC_TYPE (*table)[2]; ///< code, bits + int table_size, table_allocated; +} VLC; + +typedef struct RL_VLC_ELEM { + int16_t level; + int8_t len; + uint8_t run; +} RL_VLC_ELEM; + +#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) +#define UNALIGNED_STORES_ARE_BAD +#endif + +/* used to avoid missaligned exceptions on some archs (alpha, ...) */ +#if defined(ARCH_X86) || defined(ARCH_X86_64) +# define unaligned32(a) (*(const uint32_t*)(a)) +#else +# ifdef __GNUC__ +static inline uint32_t unaligned32(const void *v) { + struct Unaligned { + uint32_t i; + } __attribute__((packed)); + + return ((const struct Unaligned *) v)->i; +} +# elif defined(__DECC) +static inline uint32_t unaligned32(const void *v) { + return *(const __unaligned uint32_t *) v; +} +# else +static inline uint32_t unaligned32(const void *v) { + return *(const uint32_t *) v; +} +# endif +#endif //!ARCH_X86 + +#ifndef ALT_BITSTREAM_WRITER +static inline void put_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf; + int bit_left; + +#ifdef STATS + st_out_bit_counts[st_current_index] += n; +#endif + // printf("put_bits=%d %x\n", n, value); + assert(n == 32 || value < (1U << n)); + + bit_buf = s->bit_buf; + bit_left = s->bit_left; + + // printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); + /* XXX: optimize */ + if (n < bit_left) { + bit_buf = (bit_buf<> (n - bit_left); +#ifdef UNALIGNED_STORES_ARE_BAD + if (3 & (intptr_t) s->buf_ptr) { + s->buf_ptr[0] = bit_buf >> 24; + s->buf_ptr[1] = bit_buf >> 16; + s->buf_ptr[2] = bit_buf >> 8; + s->buf_ptr[3] = bit_buf ; + } else +#endif + *(uint32_t *)s->buf_ptr = be2me_32(bit_buf); + //printf("bitbuf = %08x\n", bit_buf); + s->buf_ptr+=4; + bit_left+=32 - n; + bit_buf = value; + } + + s->bit_buf = bit_buf; + s->bit_left = bit_left; +} +#endif + + +#ifdef ALT_BITSTREAM_WRITER +static inline void put_bits(PutBitContext *s, int n, unsigned int value) +{ +# ifdef ALIGNED_BITSTREAM_WRITER +# if defined(ARCH_X86) || defined(ARCH_X86_64) + asm volatile( + "movl %0, %%ecx \n\t" + "xorl %%eax, %%eax \n\t" + "shrdl %%cl, %1, %%eax \n\t" + "shrl %%cl, %1 \n\t" + "movl %0, %%ecx \n\t" + "shrl $3, %%ecx \n\t" + "andl $0xFFFFFFFC, %%ecx \n\t" + "bswapl %1 \n\t" + "orl %1, (%2, %%ecx) \n\t" + "bswapl %%eax \n\t" + "addl %3, %0 \n\t" + "movl %%eax, 4(%2, %%ecx) \n\t" + : "=&r" (s->index), "=&r" (value) + : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) + : "%eax", "%ecx" + ); +# else + int index= s->index; + uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5); + + value<<= 32-n; + + ptr[0] |= be2me_32(value>>(index&31)); + ptr[1] = be2me_32(value<<(32-(index&31))); +//if(n>24) printf("%d %d\n", n, value); + index+= n; + s->index= index; +# endif +# else //ALIGNED_BITSTREAM_WRITER +# if defined(ARCH_X86) || defined(ARCH_X86_64) + asm volatile( + "movl $7, %%ecx \n\t" + "andl %0, %%ecx \n\t" + "addl %3, %%ecx \n\t" + "negl %%ecx \n\t" + "shll %%cl, %1 \n\t" + "bswapl %1 \n\t" + "movl %0, %%ecx \n\t" + "shrl $3, %%ecx \n\t" + "orl %1, (%%ecx, %2) \n\t" + "addl %3, %0 \n\t" + "movl $0, 4(%%ecx, %2) \n\t" + : "=&r" (s->index), "=&r" (value) + : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) + : "%ecx" + ); +# else + int index= s->index; + uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3)); + + ptr[0] |= be2me_32(value<<(32-n-(index&7) )); + ptr[1] = 0; +//if(n>24) printf("%d %d\n", n, value); + index+= n; + s->index= index; +# endif +# endif //!ALIGNED_BITSTREAM_WRITER +} +#endif + + +static inline uint8_t* pbBufPtr(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + return s->buf + (s->index>>3); +#else + return s->buf_ptr; +#endif +} + +/** + * + * PutBitContext must be flushed & aligned to a byte boundary before calling this. + */ +static inline void skip_put_bytes(PutBitContext *s, int n){ + assert((put_bits_count(s)&7)==0); +#ifdef ALT_BITSTREAM_WRITER + FIXME may need some cleaning of the buffer + s->index += n<<3; +#else + assert(s->bit_left==32); + s->buf_ptr += n; +#endif +} + +/** + * skips the given number of bits. + * must only be used if the actual values in the bitstream dont matter + */ +static inline void skip_put_bits(PutBitContext *s, int n){ +#ifdef ALT_BITSTREAM_WRITER + s->index += n; +#else + s->bit_left -= n; + s->buf_ptr-= s->bit_left>>5; + s->bit_left &= 31; +#endif +} + +/** + * Changes the end of the buffer. + */ +static inline void set_put_bits_buffer_size(PutBitContext *s, int size){ + s->buf_end= s->buf + size; +} + +/* Bitstream reader API docs: +name + abritary name which is used as prefix for the internal variables + +gb + getbitcontext + +OPEN_READER(name, gb) + loads gb into local variables + +CLOSE_READER(name, gb) + stores local vars in gb + +UPDATE_CACHE(name, gb) + refills the internal cache from the bitstream + after this call at least MIN_CACHE_BITS will be available, + +GET_CACHE(name, gb) + will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit) + +SHOW_UBITS(name, gb, num) + will return the next num bits + +SHOW_SBITS(name, gb, num) + will return the next num bits and do sign extension + +SKIP_BITS(name, gb, num) + will skip over the next num bits + note, this is equivalent to SKIP_CACHE; SKIP_COUNTER + +SKIP_CACHE(name, gb, num) + will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER) + +SKIP_COUNTER(name, gb, num) + will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS) + +LAST_SKIP_CACHE(name, gb, num) + will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing + +LAST_SKIP_BITS(name, gb, num) + is equivalent to SKIP_LAST_CACHE; SKIP_COUNTER + +for examples see get_bits, show_bits, skip_bits, get_vlc +*/ + +static inline int unaligned32_be(const void *v) +{ +#ifdef CONFIG_ALIGN + const uint8_t *p=v; + return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); +#else + return be2me_32( unaligned32(v)); //original +#endif +} + +static inline int unaligned32_le(const void *v) +{ +#ifdef CONFIG_ALIGN + const uint8_t *p=v; + return (((p[3]<<8) | p[2])<<16) | (p[1]<<8) | (p[0]); +#else + return le2me_32( unaligned32(v)); //original +#endif +} + +#ifdef ALT_BITSTREAM_READER +# define MIN_CACHE_BITS 25 + +# define OPEN_READER(name, gb)\ + int name##_index= (gb)->index;\ + int name##_cache= 0;\ + +# define CLOSE_READER(name, gb)\ + (gb)->index= name##_index;\ + +# ifdef ALT_BITSTREAM_READER_LE +# define UPDATE_CACHE(name, gb)\ + name##_cache= unaligned32_le( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\ + +# define SKIP_CACHE(name, gb, num)\ + name##_cache >>= (num); +# else +# define UPDATE_CACHE(name, gb)\ + name##_cache= unaligned32_be( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\ + +# define SKIP_CACHE(name, gb, num)\ + name##_cache <<= (num); +# endif + +// FIXME name? +# define SKIP_COUNTER(name, gb, num)\ + name##_index += (num);\ + +# define SKIP_BITS(name, gb, num)\ + {\ + SKIP_CACHE(name, gb, num)\ + SKIP_COUNTER(name, gb, num)\ + }\ + +# define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num) +# define LAST_SKIP_CACHE(name, gb, num) ; + +# ifdef ALT_BITSTREAM_READER_LE +# define SHOW_UBITS(name, gb, num)\ + ((name##_cache) & (NEG_USR32(0xffffffff,num))) +# else +# define SHOW_UBITS(name, gb, num)\ + NEG_USR32(name##_cache, num) +# endif + +# define SHOW_SBITS(name, gb, num)\ + NEG_SSR32(name##_cache, num) + +# define GET_CACHE(name, gb)\ + ((uint32_t)name##_cache) + +static inline int get_bits_count(GetBitContext *s){ + return s->index; +} +#elif defined LIBMPEG2_BITSTREAM_READER +//libmpeg2 like reader + +# define MIN_CACHE_BITS 17 + +# define OPEN_READER(name, gb)\ + int name##_bit_count=(gb)->bit_count;\ + int name##_cache= (gb)->cache;\ + uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\ + +# define CLOSE_READER(name, gb)\ + (gb)->bit_count= name##_bit_count;\ + (gb)->cache= name##_cache;\ + (gb)->buffer_ptr= name##_buffer_ptr;\ + +#ifdef LIBMPEG2_BITSTREAM_READER_HACK + +# define UPDATE_CACHE(name, gb)\ + if(name##_bit_count >= 0){\ + name##_cache+= (int)be2me_16(*(uint16_t*)name##_buffer_ptr) << name##_bit_count;\ + name##_buffer_ptr += 2;\ + name##_bit_count-= 16;\ + }\ + +#else + +# define UPDATE_CACHE(name, gb)\ + if(name##_bit_count >= 0){\ + name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\ + name##_buffer_ptr+=2;\ + name##_bit_count-= 16;\ + }\ + +#endif + +# define SKIP_CACHE(name, gb, num)\ + name##_cache <<= (num);\ + +# define SKIP_COUNTER(name, gb, num)\ + name##_bit_count += (num);\ + +# define SKIP_BITS(name, gb, num)\ + {\ + SKIP_CACHE(name, gb, num)\ + SKIP_COUNTER(name, gb, num)\ + }\ + +# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num) +# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num) + +# define SHOW_UBITS(name, gb, num)\ + NEG_USR32(name##_cache, num) + +# define SHOW_SBITS(name, gb, num)\ + NEG_SSR32(name##_cache, num) + +# define GET_CACHE(name, gb)\ + ((uint32_t)name##_cache) + +static inline int get_bits_count(GetBitContext *s){ + return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count; +} + +#elif defined A32_BITSTREAM_READER + +# define MIN_CACHE_BITS 32 + +# define OPEN_READER(name, gb)\ + int name##_bit_count=(gb)->bit_count;\ + uint32_t name##_cache0= (gb)->cache0;\ + uint32_t name##_cache1= (gb)->cache1;\ + uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\ + +# define CLOSE_READER(name, gb)\ + (gb)->bit_count= name##_bit_count;\ + (gb)->cache0= name##_cache0;\ + (gb)->cache1= name##_cache1;\ + (gb)->buffer_ptr= name##_buffer_ptr;\ + +# define UPDATE_CACHE(name, gb)\ + if(name##_bit_count > 0){\ + const uint32_t next= be2me_32( *name##_buffer_ptr );\ + name##_cache0 |= NEG_USR32(next,name##_bit_count);\ + name##_cache1 |= next<buffer_ptr - s->buffer)*8 - 32 + s->bit_count; +} + +#endif + +/** + * read mpeg1 dc style vlc (sign bit + mantisse with no MSB). + * if MSB not set it is negative + * @param n length in bits + * @author BERO + */ +static inline int get_xbits(GetBitContext *s, int n){ + register int sign; + register int32_t cache; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + cache = GET_CACHE(re,s); + sign=(~cache)>>31; + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) + return (NEG_USR32(sign ^ cache, n) ^ sign) - sign; +} + +static inline int get_sbits(GetBitContext *s, int n){ + register int tmp; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + tmp= SHOW_SBITS(re, s, n); + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) + return tmp; +} + +/** + * reads 0-17 bits. + * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't + */ +static inline unsigned int get_bits(GetBitContext *s, int n){ + register int tmp; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + tmp= SHOW_UBITS(re, s, n); + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) + return tmp; +} + +unsigned int get_bits_long(GetBitContext *s, int n); + +/** + * shows 0-17 bits. + * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't + */ +static inline unsigned int show_bits(GetBitContext *s, int n){ + register int tmp; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + tmp= SHOW_UBITS(re, s, n); +// CLOSE_READER(re, s) + return tmp; +} + +unsigned int show_bits_long(GetBitContext *s, int n); + +static inline void skip_bits(GetBitContext *s, int n){ + //Note gcc seems to optimize this to s->index+=n for the ALT_READER :)) + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) +} + +static inline unsigned int get_bits1(GetBitContext *s){ +#ifdef ALT_BITSTREAM_READER + int index= s->index; + uint8_t result= s->buffer[ index>>3 ]; +#ifdef ALT_BITSTREAM_READER_LE + result>>= (index&0x07); + result&= 1; +#else + result<<= (index&0x07); + result>>= 8 - 1; +#endif + index++; + s->index= index; + + return result; +#else + return get_bits(s, 1); +#endif +} + +static inline unsigned int show_bits1(GetBitContext *s){ + return show_bits(s, 1); +} + +static inline void skip_bits1(GetBitContext *s){ + skip_bits(s, 1); +} + +/** + * init GetBitContext. + * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits + * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end + * @param bit_size the size of the buffer in bits + */ +static inline void init_get_bits(GetBitContext *s, + const uint8_t *buffer, int bit_size) +{ + int buffer_size= (bit_size+7)>>3; + if(buffer_size < 0 || bit_size < 0) { + buffer_size = bit_size = 0; + buffer = NULL; + } + + s->buffer= buffer; + s->size_in_bits= bit_size; + s->buffer_end= buffer + buffer_size; +#ifdef ALT_BITSTREAM_READER + s->index=0; +#elif defined LIBMPEG2_BITSTREAM_READER +#ifdef LIBMPEG2_BITSTREAM_READER_HACK + if ((int)buffer&1) { + /* word alignment */ + s->cache = (*buffer++)<<24; + s->buffer_ptr = buffer; + s->bit_count = 16-8; + } else +#endif + { + s->buffer_ptr = buffer; + s->bit_count = 16; + s->cache = 0; + } +#elif defined A32_BITSTREAM_READER + s->buffer_ptr = (uint32_t*)buffer; + s->bit_count = 32; + s->cache0 = 0; + s->cache1 = 0; +#endif + { + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + UPDATE_CACHE(re, s) + CLOSE_READER(re, s) + } +#ifdef A32_BITSTREAM_READER + s->cache1 = 0; +#endif +} + +int check_marker(GetBitContext *s, const char *msg); +void align_get_bits(GetBitContext *s); +int init_vlc(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + int flags); +#define INIT_VLC_USE_STATIC 1 +#define INIT_VLC_LE 2 +void free_vlc(VLC *vlc); + +/** + * + * if the vlc code is invalid and max_depth=1 than no bits will be removed + * if the vlc code is invalid and max_depth>1 than the number of bits removed + * is undefined + */ +#define GET_VLC(code, name, gb, table, bits, max_depth)\ +{\ + int n, index, nb_bits;\ +\ + index= SHOW_UBITS(name, gb, bits);\ + code = table[index][0];\ + n = table[index][1];\ +\ + if(max_depth > 1 && n < 0){\ + LAST_SKIP_BITS(name, gb, bits)\ + UPDATE_CACHE(name, gb)\ +\ + nb_bits = -n;\ +\ + index= SHOW_UBITS(name, gb, nb_bits) + code;\ + code = table[index][0];\ + n = table[index][1];\ + if(max_depth > 2 && n < 0){\ + LAST_SKIP_BITS(name, gb, nb_bits)\ + UPDATE_CACHE(name, gb)\ +\ + nb_bits = -n;\ +\ + index= SHOW_UBITS(name, gb, nb_bits) + code;\ + code = table[index][0];\ + n = table[index][1];\ + }\ + }\ + SKIP_BITS(name, gb, n)\ +} + +#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\ +{\ + int n, index, nb_bits;\ +\ + index= SHOW_UBITS(name, gb, bits);\ + level = table[index].level;\ + n = table[index].len;\ +\ + if(max_depth > 1 && n < 0){\ + SKIP_BITS(name, gb, bits)\ + if(need_update){\ + UPDATE_CACHE(name, gb)\ + }\ +\ + nb_bits = -n;\ +\ + index= SHOW_UBITS(name, gb, nb_bits) + level;\ + level = table[index].level;\ + n = table[index].len;\ + }\ + run= table[index].run;\ + SKIP_BITS(name, gb, n)\ +} + + +/** + * parses a vlc code, faster then get_vlc() + * @param bits is the number of bits which will be read at once, must be + * identical to nb_bits in init_vlc() + * @param max_depth is the number of times bits bits must be readed to completly + * read the longest vlc code + * = (max_vlc_length + bits - 1) / bits + */ +static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], + int bits, int max_depth) +{ + int code; + + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + + GET_VLC(code, re, s, table, bits, max_depth) + + CLOSE_READER(re, s) + return code; +} + +//#define TRACE + +#ifdef TRACE +#include "avcodec.h" +static inline void print_bin(int bits, int n){ + int i; + + for(i=n-1; i>=0; i--){ + av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1); + } + for(i=n; i<24; i++) + av_log(NULL, AV_LOG_DEBUG, " "); +} + +static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){ + int r= get_bits(s, n); + + print_bin(r, n); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line); + return r; +} +static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){ + int show= show_bits(s, 24); + int pos= get_bits_count(s); + int r= get_vlc2(s, table, bits, max_depth); + int len= get_bits_count(s) - pos; + int bits2= show>>(24-len); + + print_bin(bits2, len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line); + return r; +} +static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){ + int show= show_bits(s, n); + int r= get_xbits(s, n); + + print_bin(show, n); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line); + return r; +} + +#define get_bits(s, n) get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_bits1(s) get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_vlc(s, vlc) get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__) + +#define tprintf(...) av_log(NULL, AV_LOG_DEBUG, __VA_ARGS__) + +#else //TRACE +#define tprintf(...) {} +#endif + +static inline int decode012(GetBitContext *gb){ + int n; + n = get_bits1(gb); + if (n == 0) + return 0; + else + return get_bits1(gb) + 1; +} + +#endif /* BITSTREAM_H */ diff --git a/mpeg4/src/libavcodec/bmp.c b/mpeg4/src/libavcodec/bmp.c new file mode 100644 index 0000000000000000000000000000000000000000..dc9030634728b5b0020e3117fb54f8cd3b7389d4 --- /dev/null +++ b/mpeg4/src/libavcodec/bmp.c @@ -0,0 +1,251 @@ +/* + * BMP image format + * Copyright (c) 2005 Mans Rullgard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" +#include "bitstream.h" +#include "bswap.h" + +typedef struct BMPContext { + AVFrame picture; +} BMPContext; + +#define BMP_RGB 0 +#define BMP_RLE8 1 +#define BMP_RLE4 2 +#define BMP_BITFIELDS 3 + +#define read16(bits) bswap_16(get_bits(bits, 16)) +#define read32(bits) bswap_32(get_bits_long(bits, 32)) + +static int bmp_decode_init(AVCodecContext *avctx){ + BMPContext *s = avctx->priv_data; + + avcodec_get_frame_defaults((AVFrame*)&s->picture); + avctx->coded_frame = (AVFrame*)&s->picture; + + return 0; +} + +static int bmp_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + BMPContext *s = avctx->priv_data; + AVFrame *picture = data; + AVFrame *p = &s->picture; + GetBitContext bits; + unsigned int fsize, hsize; + int width, height; + unsigned int depth; + unsigned int comp; + unsigned int ihsize; + int i, j, n, linesize; + uint32_t rgb[3]; + uint8_t *ptr; + int dsize; + + if(buf_size < 14){ + av_log(avctx, AV_LOG_ERROR, "buf size too small (%d)\n", buf_size); + return -1; + } + + init_get_bits(&bits, buf, buf_size); + + if(get_bits(&bits, 16) != 0x424d){ /* 'BM' */ + av_log(avctx, AV_LOG_ERROR, "bad magic number\n"); + return -1; + } + + fsize = read32(&bits); + if(buf_size < fsize){ + av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n", + buf_size, fsize); + return -1; + } + + skip_bits(&bits, 16); /* reserved1 */ + skip_bits(&bits, 16); /* reserved2 */ + + hsize = read32(&bits); /* header size */ + if(fsize <= hsize){ + av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n", + fsize, hsize); + return -1; + } + + ihsize = read32(&bits); /* more header size */ + if(ihsize + 14 > hsize){ + av_log(avctx, AV_LOG_ERROR, "invalid header size %d\n", hsize); + return -1; + } + + width = read32(&bits); + height = read32(&bits); + + if(read16(&bits) != 1){ /* planes */ + av_log(avctx, AV_LOG_ERROR, "invalid BMP header\n"); + return -1; + } + + depth = read16(&bits); + + if(ihsize > 16) + comp = read32(&bits); + else + comp = BMP_RGB; + + if(comp != BMP_RGB && comp != BMP_BITFIELDS){ + av_log(avctx, AV_LOG_ERROR, "BMP coding %d not supported\n", comp); + return -1; + } + + if(comp == BMP_BITFIELDS){ + skip_bits(&bits, 20 * 8); + rgb[0] = read32(&bits); + rgb[1] = read32(&bits); + rgb[2] = read32(&bits); + } + + avctx->codec_id = CODEC_ID_BMP; + avctx->width = width; + avctx->height = height > 0? height: -height; + + avctx->pix_fmt = PIX_FMT_NONE; + + switch(depth){ + case 32: + if(comp == BMP_BITFIELDS){ + rgb[0] = (rgb[0] >> 15) & 3; + rgb[1] = (rgb[1] >> 15) & 3; + rgb[2] = (rgb[2] >> 15) & 3; + + if(rgb[0] + rgb[1] + rgb[2] != 3 || + rgb[0] == rgb[1] || rgb[0] == rgb[2] || rgb[1] == rgb[2]){ + break; + } + } else { + rgb[0] = 2; + rgb[1] = 1; + rgb[2] = 0; + } + + avctx->pix_fmt = PIX_FMT_BGR24; + break; + case 24: + avctx->pix_fmt = PIX_FMT_BGR24; + break; + case 16: + if(comp == BMP_RGB) + avctx->pix_fmt = PIX_FMT_RGB555; + break; + default: + av_log(avctx, AV_LOG_ERROR, "depth %d not supported\n", depth); + return -1; + } + + if(avctx->pix_fmt == PIX_FMT_NONE){ + av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n"); + return -1; + } + + p->reference = 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->pict_type = FF_I_TYPE; + p->key_frame = 1; + + buf += hsize; + dsize = buf_size - hsize; + + n = avctx->width * (depth / 8); + + if(n * avctx->height > dsize){ + av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n", + dsize, n * avctx->height); + return -1; + } + + if(height > 0){ + ptr = p->data[0] + (avctx->height - 1) * p->linesize[0]; + linesize = -p->linesize[0]; + } else { + ptr = p->data[0]; + linesize = p->linesize[0]; + } + + switch(depth){ + case 24: + for(i = 0; i < avctx->height; i++){ + memcpy(ptr, buf, n); + buf += n; + ptr += linesize; + } + break; + case 16: + for(i = 0; i < avctx->height; i++){ + uint16_t *src = (uint16_t *) buf; + uint16_t *dst = (uint16_t *) ptr; + + for(j = 0; j < avctx->width; j++) + *dst++ = le2me_16(*src++); + + buf += n; + ptr += linesize; + } + break; + case 32: + for(i = 0; i < avctx->height; i++){ + uint8_t *src = buf; + uint8_t *dst = ptr; + + for(j = 0; j < avctx->width; j++){ + dst[0] = src[rgb[2]]; + dst[1] = src[rgb[1]]; + dst[2] = src[rgb[0]]; + dst += 3; + src += 4; + } + + buf += n; + ptr += linesize; + } + break; + default: + av_log(avctx, AV_LOG_ERROR, "BMP decoder is broken\n"); + return -1; + } + + *picture = s->picture; + *data_size = sizeof(AVPicture); + + return buf_size; +} + +AVCodec bmp_decoder = { + "bmp", + CODEC_TYPE_VIDEO, + CODEC_ID_BMP, + sizeof(BMPContext), + bmp_decode_init, + NULL, + NULL, + bmp_decode_frame +}; diff --git a/mpeg4/src/libavcodec/cabac.c b/mpeg4/src/libavcodec/cabac.c new file mode 100644 index 0000000000000000000000000000000000000000..88790a960ce8752ab16664573dc8d03210f3bbc4 --- /dev/null +++ b/mpeg4/src/libavcodec/cabac.c @@ -0,0 +1,234 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file cabac.c + * Context Adaptive Binary Arithmetic Coder. + */ + +#include + +#include "common.h" +#include "bitstream.h" +#include "cabac.h" + +const uint8_t ff_h264_lps_range[64][4]= { +{128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205}, +{116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166}, +{ 95,116,137,158}, { 90,110,130,150}, { 85,104,123,142}, { 81, 99,117,135}, +{ 77, 94,111,128}, { 73, 89,105,122}, { 69, 85,100,116}, { 66, 80, 95,110}, +{ 62, 76, 90,104}, { 59, 72, 86, 99}, { 56, 69, 81, 94}, { 53, 65, 77, 89}, +{ 51, 62, 73, 85}, { 48, 59, 69, 80}, { 46, 56, 66, 76}, { 43, 53, 63, 72}, +{ 41, 50, 59, 69}, { 39, 48, 56, 65}, { 37, 45, 54, 62}, { 35, 43, 51, 59}, +{ 33, 41, 48, 56}, { 32, 39, 46, 53}, { 30, 37, 43, 50}, { 29, 35, 41, 48}, +{ 27, 33, 39, 45}, { 26, 31, 37, 43}, { 24, 30, 35, 41}, { 23, 28, 33, 39}, +{ 22, 27, 32, 37}, { 21, 26, 30, 35}, { 20, 24, 29, 33}, { 19, 23, 27, 31}, +{ 18, 22, 26, 30}, { 17, 21, 25, 28}, { 16, 20, 23, 27}, { 15, 19, 22, 25}, +{ 14, 18, 21, 24}, { 14, 17, 20, 23}, { 13, 16, 19, 22}, { 12, 15, 18, 21}, +{ 12, 14, 17, 20}, { 11, 14, 16, 19}, { 11, 13, 15, 18}, { 10, 12, 15, 17}, +{ 10, 12, 14, 16}, { 9, 11, 13, 15}, { 9, 11, 12, 14}, { 8, 10, 12, 14}, +{ 8, 9, 11, 13}, { 7, 9, 11, 12}, { 7, 9, 10, 12}, { 7, 8, 10, 11}, +{ 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, +}; + +const uint8_t ff_h264_mps_state[64]= { + 1, 2, 3, 4, 5, 6, 7, 8, + 9,10,11,12,13,14,15,16, + 17,18,19,20,21,22,23,24, + 25,26,27,28,29,30,31,32, + 33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48, + 49,50,51,52,53,54,55,56, + 57,58,59,60,61,62,62,63, +}; + +const uint8_t ff_h264_lps_state[64]= { + 0, 0, 1, 2, 2, 4, 4, 5, + 6, 7, 8, 9, 9,11,11,12, + 13,13,15,15,16,16,18,18, + 19,19,21,21,22,22,23,24, + 24,25,26,26,27,27,28,29, + 29,30,30,30,31,32,32,33, + 33,33,34,34,35,35,35,36, + 36,36,37,37,37,38,38,63, +}; + +const uint8_t ff_h264_norm_shift[256]= { + 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + +/** + * + * @param buf_size size of buf in bits + */ +void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){ + init_put_bits(&c->pb, buf, buf_size); + + c->low= 0; + c->range= 0x1FE; + c->outstanding_count= 0; +#ifdef STRICT_LIMITS + c->sym_count =0; +#endif + + c->pb.bit_left++; //avoids firstBitFlag +} + +/** + * + * @param buf_size size of buf in bits + */ +void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){ + c->bytestream_start= + c->bytestream= buf; + c->bytestream_end= buf + buf_size; + +#if CABAC_BITS == 16 + c->low = (*c->bytestream++)<<18; + c->low+= (*c->bytestream++)<<10; +#else + c->low = (*c->bytestream++)<<10; +#endif + c->low+= ((*c->bytestream++)<<2) + 2; + c->range= 0x1FE<<(CABAC_BITS + 1); +} + +void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], + uint8_t const *mps_state, uint8_t const *lps_state, int state_count){ + int i, j; + + for(i=0; ilps_range[2*i+0][j+4]= + c->lps_range[2*i+1][j+4]= lps_range[i][j]; + } + + c->mps_state[2*i+0]= 2*mps_state[i]; + c->mps_state[2*i+1]= 2*mps_state[i]+1; + + if( i ){ + c->lps_state[2*i+0]= 2*lps_state[i]; + c->lps_state[2*i+1]= 2*lps_state[i]+1; + }else{ + c->lps_state[2*i+0]= 1; + c->lps_state[2*i+1]= 0; + } + } +} + +#if 0 //selftest +#define SIZE 10240 + +#include "avcodec.h" + +int main(){ + CABACContext c; + uint8_t b[9*SIZE]; + uint8_t r[9*SIZE]; + int i; + uint8_t state[10]= {0}; + + ff_init_cabac_encoder(&c, b, SIZE); + ff_init_cabac_states(&c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); + + for(i=0; i + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file cabac.h + * Context Adaptive Binary Arithmetic Coder. + */ + + +#undef NDEBUG +#include + +#define CABAC_BITS 8 +#define CABAC_MASK ((1<pb, 1, b); + for(;c->outstanding_count; c->outstanding_count--){ + put_bits(&c->pb, 1, 1-b); + } +} + +static inline void renorm_cabac_encoder(CABACContext *c){ + while(c->range < 0x100){ + //FIXME optimize + if(c->low<0x100){ + put_cabac_bit(c, 0); + }else if(c->low<0x200){ + c->outstanding_count++; + c->low -= 0x100; + }else{ + put_cabac_bit(c, 1); + c->low -= 0x200; + } + + c->range+= c->range; + c->low += c->low; + } +} + +static inline void put_cabac(CABACContext *c, uint8_t * const state, int bit){ + int RangeLPS= c->lps_range[*state][c->range>>6]; + + if(bit == ((*state)&1)){ + c->range -= RangeLPS; + *state= c->mps_state[*state]; + }else{ + c->low += c->range - RangeLPS; + c->range = RangeLPS; + *state= c->lps_state[*state]; + } + + renorm_cabac_encoder(c); + +#ifdef STRICT_LIMITS + c->symCount++; +#endif +} + +static inline void put_cabac_static(CABACContext *c, int RangeLPS, int bit){ + assert(c->range > RangeLPS); + + if(!bit){ + c->range -= RangeLPS; + }else{ + c->low += c->range - RangeLPS; + c->range = RangeLPS; + } + + renorm_cabac_encoder(c); + +#ifdef STRICT_LIMITS + c->symCount++; +#endif +} + +/** + * @param bit 0 -> write zero bit, !=0 write one bit + */ +static inline void put_cabac_bypass(CABACContext *c, int bit){ + c->low += c->low; + + if(bit){ + c->low += c->range; + } +//FIXME optimize + if(c->low<0x200){ + put_cabac_bit(c, 0); + }else if(c->low<0x400){ + c->outstanding_count++; + c->low -= 0x200; + }else{ + put_cabac_bit(c, 1); + c->low -= 0x400; + } + +#ifdef STRICT_LIMITS + c->symCount++; +#endif +} + +/** + * + * @return the number of bytes written + */ +static inline int put_cabac_terminate(CABACContext *c, int bit){ + c->range -= 2; + + if(!bit){ + renorm_cabac_encoder(c); + }else{ + c->low += c->range; + c->range= 2; + + renorm_cabac_encoder(c); + + assert(c->low <= 0x1FF); + put_cabac_bit(c, c->low>>9); + put_bits(&c->pb, 2, ((c->low>>7)&3)|1); + + flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong + } + +#ifdef STRICT_LIMITS + c->symCount++; +#endif + + return (put_bits_count(&c->pb)+7)>>3; +} + +/** + * put (truncated) unary binarization. + */ +static inline void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){ + int i; + + assert(v <= max); + +#if 1 + for(i=0; i= m){ //FIXME optimize + put_cabac_bypass(c, 1); + v-= m; + m+= m; + } + put_cabac_bypass(c, 0); + while(m>>=1){ + put_cabac_bypass(c, v&m); + } + } + + if(is_signed) + put_cabac_bypass(c, sign); + } +} + +static void refill(CABACContext *c){ + if(c->bytestream <= c->bytestream_end) +#if CABAC_BITS == 16 + c->low+= ((c->bytestream[0]<<9) + (c->bytestream[1])<<1); +#else + c->low+= c->bytestream[0]<<1; +#endif + c->low -= CABAC_MASK; + c->bytestream+= CABAC_BITS/8; +} + +#if 0 /* all use commented */ +static void refill2(CABACContext *c){ + int i, x; + + x= c->low ^ (c->low-1); + i= 8 - ff_h264_norm_shift[x>>(CABAC_BITS+1)]; + + x= -CABAC_MASK; + + if(c->bytestream < c->bytestream_end) +#if CABAC_BITS == 16 + x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); +#else + x+= c->bytestream[0]<<1; +#endif + + c->low += x<bytestream+= CABAC_BITS/8; +} +#endif + +static inline void renorm_cabac_decoder(CABACContext *c){ + while(c->range < (0x200 << CABAC_BITS)){ + c->range+= c->range; + c->low+= c->low; + if(!(c->low & CABAC_MASK)) + refill(c); + } +} + +static inline void renorm_cabac_decoder_once(CABACContext *c){ + int mask= (c->range - (0x200 << CABAC_BITS))>>31; + c->range+= c->range&mask; + c->low += c->low &mask; + if(!(c->low & CABAC_MASK)) + refill(c); +} + +static inline int get_cabac(CABACContext *c, uint8_t * const state){ + int RangeLPS= c->lps_range[*state][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); + int bit, lps_mask attribute_unused; + + c->range -= RangeLPS; +#if 1 + if(c->low < c->range){ + bit= (*state)&1; + *state= c->mps_state[*state]; + renorm_cabac_decoder_once(c); + }else{ +// int shift= ff_h264_norm_shift[RangeLPS>>17]; + bit= ((*state)&1)^1; + c->low -= c->range; + *state= c->lps_state[*state]; + c->range = RangeLPS; + renorm_cabac_decoder(c); +/* c->range = RangeLPS<low <<= shift; + if(!(c->low & 0xFFFF)){ + refill2(c); + }*/ + } +#else + lps_mask= (c->range - c->low)>>31; + + c->low -= c->range & lps_mask; + c->range += (RangeLPS - c->range) & lps_mask; + + bit= ((*state)^lps_mask)&1; + *state= c->mps_state[(*state) - (128&lps_mask)]; + + lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+2)]; + c->range<<= lps_mask; + c->low <<= lps_mask; + if(!(c->low & CABAC_MASK)) + refill2(c); +#endif + + return bit; +} + +static inline int get_cabac_bypass(CABACContext *c){ + c->low += c->low; + + if(!(c->low & CABAC_MASK)) + refill(c); + + if(c->low < c->range){ + return 0; + }else{ + c->low -= c->range; + return 1; + } +} + +/** + * + * @return the number of bytes read or 0 if no end + */ +static inline int get_cabac_terminate(CABACContext *c){ + c->range -= 4<low < c->range){ + renorm_cabac_decoder_once(c); + return 0; + }else{ + return c->bytestream - c->bytestream_start; + } +} + +/** + * get (truncated) unnary binarization. + */ +static inline int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){ + int i; + + for(i=0; i>=1){ + v+= v + get_cabac_bypass(c); + } + i += v; + + if(is_signed && get_cabac_bypass(c)){ + return -i; + }else + return i; +} diff --git a/mpeg4/src/libavcodec/cinepak.c b/mpeg4/src/libavcodec/cinepak.c new file mode 100644 index 0000000000000000000000000000000000000000..7976812315266cd8f14030ff36611b1a290feae3 --- /dev/null +++ b/mpeg4/src/libavcodec/cinepak.c @@ -0,0 +1,451 @@ +/* + * Cinepak Video Decoder + * Copyright (C) 2003 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file cinepak.c + * Cinepak video decoder + * by Ewald Snel + * For more information on the Cinepak algorithm, visit: + * http://www.csse.monash.edu.au/~timf/ + */ + +#include +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" + + +typedef struct { + uint8_t y0, y1, y2, y3; + uint8_t u, v; +} cvid_codebook_t; + +#define MAX_STRIPS 32 + +typedef struct { + uint16_t id; + uint16_t x1, y1; + uint16_t x2, y2; + cvid_codebook_t v4_codebook[256]; + cvid_codebook_t v1_codebook[256]; +} cvid_strip_t; + +typedef struct CinepakContext { + + AVCodecContext *avctx; + DSPContext dsp; + AVFrame frame; + + unsigned char *data; + int size; + + int width, height; + + int palette_video; + cvid_strip_t strips[MAX_STRIPS]; + +} CinepakContext; + +static void cinepak_decode_codebook (cvid_codebook_t *codebook, + int chunk_id, int size, uint8_t *data) +{ + uint8_t *eod = (data + size); + uint32_t flag, mask; + int i, n; + + /* check if this chunk contains 4- or 6-element vectors */ + n = (chunk_id & 0x0400) ? 4 : 6; + flag = 0; + mask = 0; + + for (i=0; i < 256; i++) { + if ((chunk_id & 0x0100) && !(mask >>= 1)) { + if ((data + 4) > eod) + break; + + flag = BE_32 (data); + data += 4; + mask = 0x80000000; + } + + if (!(chunk_id & 0x0100) || (flag & mask)) { + if ((data + n) > eod) + break; + + if (n == 6) { + codebook[i].y0 = *data++; + codebook[i].y1 = *data++; + codebook[i].y2 = *data++; + codebook[i].y3 = *data++; + codebook[i].u = 128 + *data++; + codebook[i].v = 128 + *data++; + } else { + /* this codebook type indicates either greyscale or + * palettized video; if palettized, U & V components will + * not be used so it is safe to set them to 128 for the + * benefit of greyscale rendering in YUV420P */ + codebook[i].y0 = *data++; + codebook[i].y1 = *data++; + codebook[i].y2 = *data++; + codebook[i].y3 = *data++; + codebook[i].u = 128; + codebook[i].v = 128; + } + } + } +} + +static int cinepak_decode_vectors (CinepakContext *s, cvid_strip_t *strip, + int chunk_id, int size, uint8_t *data) +{ + uint8_t *eod = (data + size); + uint32_t flag, mask; + cvid_codebook_t *codebook; + unsigned int x, y; + uint32_t iy[4]; + uint32_t iu[2]; + uint32_t iv[2]; + + flag = 0; + mask = 0; + + for (y=strip->y1; y < strip->y2; y+=4) { + + iy[0] = strip->x1 + (y * s->frame.linesize[0]); + iy[1] = iy[0] + s->frame.linesize[0]; + iy[2] = iy[1] + s->frame.linesize[0]; + iy[3] = iy[2] + s->frame.linesize[0]; + iu[0] = (strip->x1/2) + ((y/2) * s->frame.linesize[1]); + iu[1] = iu[0] + s->frame.linesize[1]; + iv[0] = (strip->x1/2) + ((y/2) * s->frame.linesize[2]); + iv[1] = iv[0] + s->frame.linesize[2]; + + for (x=strip->x1; x < strip->x2; x+=4) { + if ((chunk_id & 0x0100) && !(mask >>= 1)) { + if ((data + 4) > eod) + return -1; + + flag = BE_32 (data); + data += 4; + mask = 0x80000000; + } + + if (!(chunk_id & 0x0100) || (flag & mask)) { + if (!(chunk_id & 0x0200) && !(mask >>= 1)) { + if ((data + 4) > eod) + return -1; + + flag = BE_32 (data); + data += 4; + mask = 0x80000000; + } + + if ((chunk_id & 0x0200) || (~flag & mask)) { + if (data >= eod) + return -1; + + codebook = &strip->v1_codebook[*data++]; + s->frame.data[0][iy[0] + 0] = codebook->y0; + s->frame.data[0][iy[0] + 1] = codebook->y0; + s->frame.data[0][iy[1] + 0] = codebook->y0; + s->frame.data[0][iy[1] + 1] = codebook->y0; + if (!s->palette_video) { + s->frame.data[1][iu[0]] = codebook->u; + s->frame.data[2][iv[0]] = codebook->v; + } + + s->frame.data[0][iy[0] + 2] = codebook->y1; + s->frame.data[0][iy[0] + 3] = codebook->y1; + s->frame.data[0][iy[1] + 2] = codebook->y1; + s->frame.data[0][iy[1] + 3] = codebook->y1; + if (!s->palette_video) { + s->frame.data[1][iu[0] + 1] = codebook->u; + s->frame.data[2][iv[0] + 1] = codebook->v; + } + + s->frame.data[0][iy[2] + 0] = codebook->y2; + s->frame.data[0][iy[2] + 1] = codebook->y2; + s->frame.data[0][iy[3] + 0] = codebook->y2; + s->frame.data[0][iy[3] + 1] = codebook->y2; + if (!s->palette_video) { + s->frame.data[1][iu[1]] = codebook->u; + s->frame.data[2][iv[1]] = codebook->v; + } + + s->frame.data[0][iy[2] + 2] = codebook->y3; + s->frame.data[0][iy[2] + 3] = codebook->y3; + s->frame.data[0][iy[3] + 2] = codebook->y3; + s->frame.data[0][iy[3] + 3] = codebook->y3; + if (!s->palette_video) { + s->frame.data[1][iu[1] + 1] = codebook->u; + s->frame.data[2][iv[1] + 1] = codebook->v; + } + + } else if (flag & mask) { + if ((data + 4) > eod) + return -1; + + codebook = &strip->v4_codebook[*data++]; + s->frame.data[0][iy[0] + 0] = codebook->y0; + s->frame.data[0][iy[0] + 1] = codebook->y1; + s->frame.data[0][iy[1] + 0] = codebook->y2; + s->frame.data[0][iy[1] + 1] = codebook->y3; + if (!s->palette_video) { + s->frame.data[1][iu[0]] = codebook->u; + s->frame.data[2][iv[0]] = codebook->v; + } + + codebook = &strip->v4_codebook[*data++]; + s->frame.data[0][iy[0] + 2] = codebook->y0; + s->frame.data[0][iy[0] + 3] = codebook->y1; + s->frame.data[0][iy[1] + 2] = codebook->y2; + s->frame.data[0][iy[1] + 3] = codebook->y3; + if (!s->palette_video) { + s->frame.data[1][iu[0] + 1] = codebook->u; + s->frame.data[2][iv[0] + 1] = codebook->v; + } + + codebook = &strip->v4_codebook[*data++]; + s->frame.data[0][iy[2] + 0] = codebook->y0; + s->frame.data[0][iy[2] + 1] = codebook->y1; + s->frame.data[0][iy[3] + 0] = codebook->y2; + s->frame.data[0][iy[3] + 1] = codebook->y3; + if (!s->palette_video) { + s->frame.data[1][iu[1]] = codebook->u; + s->frame.data[2][iv[1]] = codebook->v; + } + + codebook = &strip->v4_codebook[*data++]; + s->frame.data[0][iy[2] + 2] = codebook->y0; + s->frame.data[0][iy[2] + 3] = codebook->y1; + s->frame.data[0][iy[3] + 2] = codebook->y2; + s->frame.data[0][iy[3] + 3] = codebook->y3; + if (!s->palette_video) { + s->frame.data[1][iu[1] + 1] = codebook->u; + s->frame.data[2][iv[1] + 1] = codebook->v; + } + + } + } + + iy[0] += 4; iy[1] += 4; + iy[2] += 4; iy[3] += 4; + iu[0] += 2; iu[1] += 2; + iv[0] += 2; iv[1] += 2; + } + } + + return 0; +} + +static int cinepak_decode_strip (CinepakContext *s, + cvid_strip_t *strip, uint8_t *data, int size) +{ + uint8_t *eod = (data + size); + int chunk_id, chunk_size; + + /* coordinate sanity checks */ + if (strip->x1 >= s->width || strip->x2 > s->width || + strip->y1 >= s->height || strip->y2 > s->height || + strip->x1 >= strip->x2 || strip->y1 >= strip->y2) + return -1; + + while ((data + 4) <= eod) { + chunk_id = BE_16 (&data[0]); + chunk_size = BE_16 (&data[2]) - 4; + if(chunk_size < 0) + return -1; + + data += 4; + chunk_size = ((data + chunk_size) > eod) ? (eod - data) : chunk_size; + + switch (chunk_id) { + + case 0x2000: + case 0x2100: + case 0x2400: + case 0x2500: + cinepak_decode_codebook (strip->v4_codebook, chunk_id, + chunk_size, data); + break; + + case 0x2200: + case 0x2300: + case 0x2600: + case 0x2700: + cinepak_decode_codebook (strip->v1_codebook, chunk_id, + chunk_size, data); + break; + + case 0x3000: + case 0x3100: + case 0x3200: + return cinepak_decode_vectors (s, strip, chunk_id, + chunk_size, data); + } + + data += chunk_size; + } + + return -1; +} + +static int cinepak_decode (CinepakContext *s) +{ + uint8_t *eod = (s->data + s->size); + int i, result, strip_size, frame_flags, num_strips; + int y0 = 0; + int encoded_buf_size; + /* if true, Cinepak data is from a Sega FILM/CPK file */ + int sega_film_data = 0; + + if (s->size < 10) + return -1; + + frame_flags = s->data[0]; + num_strips = BE_16 (&s->data[8]); + encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2])); + if (encoded_buf_size != s->size) + sega_film_data = 1; + if (sega_film_data) + s->data += 12; + else + s->data += 10; + + if (num_strips > MAX_STRIPS) + num_strips = MAX_STRIPS; + + for (i=0; i < num_strips; i++) { + if ((s->data + 12) > eod) + return -1; + + s->strips[i].id = BE_16 (s->data); + s->strips[i].y1 = y0; + s->strips[i].x1 = 0; + s->strips[i].y2 = y0 + BE_16 (&s->data[8]); + s->strips[i].x2 = s->avctx->width; + + strip_size = BE_16 (&s->data[2]) - 12; + s->data += 12; + strip_size = ((s->data + strip_size) > eod) ? (eod - s->data) : strip_size; + + if ((i > 0) && !(frame_flags & 0x01)) { + memcpy (s->strips[i].v4_codebook, s->strips[i-1].v4_codebook, + sizeof(s->strips[i].v4_codebook)); + memcpy (s->strips[i].v1_codebook, s->strips[i-1].v1_codebook, + sizeof(s->strips[i].v1_codebook)); + } + + result = cinepak_decode_strip (s, &s->strips[i], s->data, strip_size); + + if (result != 0) + return result; + + s->data += strip_size; + y0 = s->strips[i].y2; + } + return 0; +} + +static int cinepak_decode_init(AVCodecContext *avctx) +{ + CinepakContext *s = (CinepakContext *)avctx->priv_data; + + s->avctx = avctx; + s->width = (avctx->width + 3) & ~3; + s->height = (avctx->height + 3) & ~3; + + // check for paletted data + if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) { + s->palette_video = 0; + avctx->pix_fmt = PIX_FMT_YUV420P; + } else { + s->palette_video = 1; + avctx->pix_fmt = PIX_FMT_PAL8; + } + + avctx->has_b_frames = 0; + dsputil_init(&s->dsp, avctx); + + s->frame.data[0] = NULL; + + return 0; +} + +static int cinepak_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + CinepakContext *s = (CinepakContext *)avctx->priv_data; + + s->data = buf; + s->size = buf_size; + + s->frame.reference = 1; + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | + FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, &s->frame)) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + + cinepak_decode(s); + + if (s->palette_video) { + memcpy (s->frame.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); + if (avctx->palctrl->palette_changed) { + s->frame.palette_has_changed = 1; + avctx->palctrl->palette_changed = 0; + } else + s->frame.palette_has_changed = 0; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = s->frame; + + /* report that the buffer was completely consumed */ + return buf_size; +} + +static int cinepak_decode_end(AVCodecContext *avctx) +{ + CinepakContext *s = (CinepakContext *)avctx->priv_data; + + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + return 0; +} + +AVCodec cinepak_decoder = { + "cinepak", + CODEC_TYPE_VIDEO, + CODEC_ID_CINEPAK, + sizeof(CinepakContext), + cinepak_decode_init, + NULL, + cinepak_decode_end, + cinepak_decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/cljr.c b/mpeg4/src/libavcodec/cljr.c new file mode 100644 index 0000000000000000000000000000000000000000..feb0d8bb288cedf1c539df36b32f395ac637f367 --- /dev/null +++ b/mpeg4/src/libavcodec/cljr.c @@ -0,0 +1,156 @@ +/* + * Cirrus Logic AccuPak (CLJR) codec + * Copyright (c) 2003 Alex Beregszaszi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file cljr.c + * Cirrus Logic AccuPak codec. + */ + +#include "avcodec.h" +#include "mpegvideo.h" + +typedef struct CLJRContext{ + AVCodecContext *avctx; + AVFrame picture; + int delta[16]; + int offset[4]; + GetBitContext gb; +} CLJRContext; + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + CLJRContext * const a = avctx->priv_data; + AVFrame *picture = data; + AVFrame * const p= (AVFrame*)&a->picture; + int x, y; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference= 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->pict_type= I_TYPE; + p->key_frame= 1; + + init_get_bits(&a->gb, buf, buf_size); + + for(y=0; yheight; y++){ + uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ]; + uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ]; + uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ]; + for(x=0; xwidth; x+=4){ + luma[3] = get_bits(&a->gb, 5) << 3; + luma[2] = get_bits(&a->gb, 5) << 3; + luma[1] = get_bits(&a->gb, 5) << 3; + luma[0] = get_bits(&a->gb, 5) << 3; + luma+= 4; + *(cb++) = get_bits(&a->gb, 6) << 2; + *(cr++) = get_bits(&a->gb, 6) << 2; + } + } + + *picture= *(AVFrame*)&a->picture; + *data_size = sizeof(AVPicture); + + emms_c(); + + return buf_size; +} + +#if 0 +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + CLJRContext * const a = avctx->priv_data; + AVFrame *pict = data; + AVFrame * const p= (AVFrame*)&a->picture; + int size; + int mb_x, mb_y; + + *p = *pict; + p->pict_type= I_TYPE; + p->key_frame= 1; + + emms_c(); + + align_put_bits(&a->pb); + while(get_bit_count(&a->pb)&31) + put_bits(&a->pb, 8, 0); + + size= get_bit_count(&a->pb)/32; + + return size*4; +} +#endif + +static void common_init(AVCodecContext *avctx){ + CLJRContext * const a = avctx->priv_data; + + avctx->coded_frame= (AVFrame*)&a->picture; + a->avctx= avctx; +} + +static int decode_init(AVCodecContext *avctx){ + + common_init(avctx); + + avctx->pix_fmt= PIX_FMT_YUV411P; + + return 0; +} + +#if 0 +static int encode_init(AVCodecContext *avctx){ + + common_init(avctx); + + return 0; +} +#endif + +AVCodec cljr_decoder = { + "cljr", + CODEC_TYPE_VIDEO, + CODEC_ID_CLJR, + sizeof(CLJRContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_DR1, +}; +#if 0 +#ifdef CONFIG_ENCODERS + +AVCodec cljr_encoder = { + "cljr", + CODEC_TYPE_VIDEO, + CODEC_ID_cljr, + sizeof(CLJRContext), + encode_init, + encode_frame, + //encode_end, +}; + +#endif //CONFIG_ENCODERS +#endif diff --git a/mpeg4/src/libavcodec/cook.c b/mpeg4/src/libavcodec/cook.c new file mode 100644 index 0000000000000000000000000000000000000000..07368ca4762d0d039fabbeea91bf8558f42c7771 --- /dev/null +++ b/mpeg4/src/libavcodec/cook.c @@ -0,0 +1,1307 @@ +/* + * COOK compatible decoder + * Copyright (c) 2003 Sascha Sommer + * Copyright (c) 2005 Benjamin Larsson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file cook.c + * Cook compatible decoder. + * This decoder handles RealNetworks, RealAudio G2 data. + * Cook is identified by the codec name cook in RM files. + * + * To use this decoder, a calling application must supply the extradata + * bytes provided from the RM container; 8+ bytes for mono streams and + * 16+ for stereo streams (maybe more). + * + * Codec technicalities (all this assume a buffer length of 1024): + * Cook works with several different techniques to achieve its compression. + * In the timedomain the buffer is divided into 8 pieces and quantized. If + * two neighboring pieces have different quantization index a smooth + * quantization curve is used to get a smooth overlap between the different + * pieces. + * To get to the transformdomain Cook uses a modulated lapped transform. + * The transform domain has 50 subbands with 20 elements each. This + * means only a maximum of 50*20=1000 coefficients are used out of the 1024 + * available. + */ + +#include +#include +#include + +#define ALT_BITSTREAM_READER +#include "avcodec.h" +#include "bitstream.h" +#include "dsputil.h" + +#include "cookdata.h" + +/* the different Cook versions */ +#define MONO_COOK1 0x1000001 +#define MONO_COOK2 0x1000002 +#define JOINT_STEREO 0x1000003 +#define MC_COOK 0x2000000 //multichannel Cook, not supported + +#define SUBBAND_SIZE 20 +//#define COOKDEBUG + +typedef struct { + int size; + int qidx_table1[8]; + int qidx_table2[8]; +} COOKgain; + +typedef struct __attribute__((__packed__)){ + /* codec data start */ + uint32_t cookversion; //in network order, bigendian + uint16_t samples_per_frame; //amount of samples per frame per channel, bigendian + uint16_t subbands; //amount of bands used in the frequency domain, bigendian + /* Mono extradata ends here. */ + uint32_t unused; + uint16_t js_subband_start; //bigendian + uint16_t js_vlc_bits; //bigendian + /* Stereo extradata ends here. */ +} COOKextradata; + + +typedef struct { + GetBitContext gb; + /* stream data */ + int nb_channels; + int joint_stereo; + int bit_rate; + int sample_rate; + int samples_per_channel; + int samples_per_frame; + int subbands; + int log2_numvector_size; + int numvector_size; //1 << log2_numvector_size; + int js_subband_start; + int total_subbands; + int num_vectors; + int bits_per_subpacket; + /* states */ + int random_state; + + /* transform data */ + FFTContext fft_ctx; + FFTSample mlt_tmp[1024] __attribute__((aligned(16))); /* temporary storage for imlt */ + float* mlt_window; + float* mlt_precos; + float* mlt_presin; + float* mlt_postcos; + int fft_size; + int fft_order; + int mlt_size; //modulated lapped transform size + + /* gain buffers */ + COOKgain* gain_now_ptr; + COOKgain* gain_previous_ptr; + COOKgain gain_current; + COOKgain gain_now; + COOKgain gain_previous; + COOKgain gain_channel1[2]; + COOKgain gain_channel2[2]; + + /* VLC data */ + int js_vlc_bits; + VLC envelope_quant_index[13]; + VLC sqvh[7]; //scalar quantization + VLC ccpl; //channel coupling + + /* generatable tables and related variables */ + int gain_size_factor; + float gain_table[23]; + float pow2tab[127]; + float rootpow2tab[127]; + + /* data buffers */ + + uint8_t* decoded_bytes_buffer; + float mono_mdct_output[2048] __attribute__((aligned(16))); + float* previous_buffer_ptr[2]; + float mono_previous_buffer1[1024]; + float mono_previous_buffer2[1024]; + float* decode_buf_ptr[4]; + float* decode_buf_ptr2[2]; + float decode_buffer_1[1024]; + float decode_buffer_2[1024]; + float decode_buffer_3[1024]; + float decode_buffer_4[1024]; +} COOKContext; + +/* debug functions */ + +#ifdef COOKDEBUG +static void dump_float_table(float* table, int size, int delimiter) { + int i=0; + av_log(NULL,AV_LOG_ERROR,"\n[%d]: ",i); + for (i=0 ; ipow2tab[63] = 1.0; + for (i=1 ; i<64 ; i++){ + q->pow2tab[63+i]=(float)((uint64_t)1<pow2tab[63-i]=1.0/(float)((uint64_t)1<rootpow2tab[63] = 1.0; + for (i=1 ; i<64 ; i++){ + q->rootpow2tab[63+i]=sqrt((float)((uint64_t)1<rootpow2tab[63-i]=sqrt(1.0/(float)((uint64_t)1<gain_size_factor = q->samples_per_channel/8; + for (i=0 ; i<23 ; i++) { + q->gain_table[i] = pow((double)q->pow2tab[i+52] , + (1.0/(double)q->gain_size_factor)); + } +} + + +static int init_cook_vlc_tables(COOKContext *q) { + int i, result; + + result = 0; + for (i=0 ; i<13 ; i++) { + result &= init_vlc (&q->envelope_quant_index[i], 9, 24, + envelope_quant_index_huffbits[i], 1, 1, + envelope_quant_index_huffcodes[i], 2, 2, 0); + } + av_log(NULL,AV_LOG_DEBUG,"sqvh VLC init\n"); + for (i=0 ; i<7 ; i++) { + result &= init_vlc (&q->sqvh[i], vhvlcsize_tab[i], vhsize_tab[i], + cvh_huffbits[i], 1, 1, + cvh_huffcodes[i], 2, 2, 0); + } + + if (q->nb_channels==2 && q->joint_stereo==1){ + result &= init_vlc (&q->ccpl, 6, (1<js_vlc_bits)-1, + ccpl_huffbits[q->js_vlc_bits-2], 1, 1, + ccpl_huffcodes[q->js_vlc_bits-2], 2, 2, 0); + av_log(NULL,AV_LOG_DEBUG,"Joint-stereo VLC used.\n"); + } + + av_log(NULL,AV_LOG_DEBUG,"VLC tables initialized.\n"); + return result; +} + +static int init_cook_mlt(COOKContext *q) { + int j; + float alpha; + + /* Allocate the buffers, could be replaced with a static [512] + array if needed. */ + q->mlt_size = q->samples_per_channel; + q->mlt_window = av_malloc(sizeof(float)*q->mlt_size); + q->mlt_precos = av_malloc(sizeof(float)*q->mlt_size/2); + q->mlt_presin = av_malloc(sizeof(float)*q->mlt_size/2); + q->mlt_postcos = av_malloc(sizeof(float)*q->mlt_size/2); + + /* Initialize the MLT window: simple sine window. */ + alpha = M_PI / (2.0 * (float)q->mlt_size); + for(j=0 ; jmlt_size ; j++) { + q->mlt_window[j] = sin((j + 512.0/(float)q->mlt_size) * alpha); + } + + /* pre/post twiddle factors */ + for (j=0 ; jmlt_size/2 ; j++){ + q->mlt_precos[j] = cos( ((j+0.25)*M_PI)/q->mlt_size); + q->mlt_presin[j] = sin( ((j+0.25)*M_PI)/q->mlt_size); + q->mlt_postcos[j] = (float)sqrt(2.0/(float)q->mlt_size)*cos( ((float)j*M_PI) /q->mlt_size); //sqrt(2/MLT_size) = scalefactor + } + + /* Initialize the FFT. */ + ff_fft_init(&q->fft_ctx, av_log2(q->mlt_size)-1, 0); + av_log(NULL,AV_LOG_DEBUG,"FFT initialized, order = %d.\n", + av_log2(q->samples_per_channel)-1); + + return (int)(q->mlt_window && q->mlt_precos && q->mlt_presin && q->mlt_postcos); +} + +/*************** init functions end ***********/ + +/** + * Cook indata decoding, every 32 bits are XORed with 0x37c511f2. + * Why? No idea, some checksum/error detection method maybe. + * Nice way to waste CPU cycles. + * + * @param in pointer to 32bit array of indata + * @param bits amount of bits + * @param out pointer to 32bit array of outdata + */ + +static inline void decode_bytes(uint8_t* inbuffer, uint8_t* out, int bytes){ + int i; + uint32_t* buf = (uint32_t*) inbuffer; + uint32_t* obuf = (uint32_t*) out; + /* FIXME: 64 bit platforms would be able to do 64 bits at a time. + * I'm too lazy though, should be something like + * for(i=0 ; ipriv_data; + av_log(NULL,AV_LOG_DEBUG, "Deallocating memory.\n"); + + /* Free allocated memory buffers. */ + av_free(q->mlt_window); + av_free(q->mlt_precos); + av_free(q->mlt_presin); + av_free(q->mlt_postcos); + av_free(q->decoded_bytes_buffer); + + /* Free the transform. */ + ff_fft_end(&q->fft_ctx); + + /* Free the VLC tables. */ + for (i=0 ; i<13 ; i++) { + free_vlc(&q->envelope_quant_index[i]); + } + for (i=0 ; i<7 ; i++) { + free_vlc(&q->sqvh[i]); + } + if(q->nb_channels==2 && q->joint_stereo==1 ){ + free_vlc(&q->ccpl); + } + + av_log(NULL,AV_LOG_DEBUG,"Memory deallocated.\n"); + + return 0; +} + +/** + * Fill the COOKgain structure for the timedomain quantization. + * + * @param q pointer to the COOKContext + * @param gaininfo pointer to the COOKgain + */ + +static void decode_gain_info(GetBitContext *gb, COOKgain* gaininfo) { + int i; + + while (get_bits1(gb)) {} + + gaininfo->size = get_bits_count(gb) - 1; //amount of elements*2 to update + + if (get_bits_count(gb) - 1 <= 0) return; + + for (i=0 ; isize ; i++){ + gaininfo->qidx_table1[i] = get_bits(gb,3); + if (get_bits1(gb)) { + gaininfo->qidx_table2[i] = get_bits(gb,4) - 7; //convert to signed + } else { + gaininfo->qidx_table2[i] = -1; + } + } +} + +/** + * Create the quant index table needed for the envelope. + * + * @param q pointer to the COOKContext + * @param quant_index_table pointer to the array + */ + +static void decode_envelope(COOKContext *q, int* quant_index_table) { + int i,j, vlc_index; + int bitbias; + + bitbias = get_bits_count(&q->gb); + quant_index_table[0]= get_bits(&q->gb,6) - 6; //This is used later in categorize + + for (i=1 ; i < q->total_subbands ; i++){ + vlc_index=i; + if (i >= q->js_subband_start * 2) { + vlc_index-=q->js_subband_start; + } else { + vlc_index/=2; + if(vlc_index < 1) vlc_index = 1; + } + if (vlc_index>13) vlc_index = 13; //the VLC tables >13 are identical to No. 13 + + j = get_vlc2(&q->gb, q->envelope_quant_index[vlc_index-1].table, + q->envelope_quant_index[vlc_index-1].bits,2); + quant_index_table[i] = quant_index_table[i-1] + j - 12; //differential encoding + } +} + +/** + * Create the quant value table. + * + * @param q pointer to the COOKContext + * @param quant_value_table pointer to the array + */ + +static void inline dequant_envelope(COOKContext *q, int* quant_index_table, + float* quant_value_table){ + + int i; + for(i=0 ; i < q->total_subbands ; i++){ + quant_value_table[i] = q->rootpow2tab[quant_index_table[i]+63]; + } +} + +/** + * Calculate the category and category_index vector. + * + * @param q pointer to the COOKContext + * @param quant_index_table pointer to the array + * @param category pointer to the category array + * @param category_index pointer to the category_index array + */ + +static void categorize(COOKContext *q, int* quant_index_table, + int* category, int* category_index){ + int exp_idx, bias, tmpbias, bits_left, num_bits, index, v, i, j; + int exp_index2[102]; + int exp_index1[102]; + + int tmp_categorize_array1[128]; + int tmp_categorize_array1_idx=0; + int tmp_categorize_array2[128]; + int tmp_categorize_array2_idx=0; + int category_index_size=0; + + bits_left = q->bits_per_subpacket - get_bits_count(&q->gb); + + if(bits_left > q->samples_per_channel) { + bits_left = q->samples_per_channel + + ((bits_left - q->samples_per_channel)*5)/8; + //av_log(NULL, AV_LOG_ERROR, "bits_left = %d\n",bits_left); + } + + memset(&exp_index1,0,102*sizeof(int)); + memset(&exp_index2,0,102*sizeof(int)); + memset(&tmp_categorize_array1,0,128*sizeof(int)); + memset(&tmp_categorize_array2,0,128*sizeof(int)); + + bias=-32; + + /* Estimate bias. */ + for (i=32 ; i>0 ; i=i/2){ + num_bits = 0; + index = 0; + for (j=q->total_subbands ; j>0 ; j--){ + exp_idx = (i - quant_index_table[index] + bias) / 2; + if (exp_idx<0){ + exp_idx=0; + } else if(exp_idx >7) { + exp_idx=7; + } + index++; + num_bits+=expbits_tab[exp_idx]; + } + if(num_bits >= bits_left - 32){ + bias+=i; + } + } + + /* Calculate total number of bits. */ + num_bits=0; + for (i=0 ; itotal_subbands ; i++) { + exp_idx = (bias - quant_index_table[i]) / 2; + if (exp_idx<0) { + exp_idx=0; + } else if(exp_idx >7) { + exp_idx=7; + } + num_bits += expbits_tab[exp_idx]; + exp_index1[i] = exp_idx; + exp_index2[i] = exp_idx; + } + tmpbias = bias = num_bits; + + for (j = 1 ; j < q->numvector_size ; j++) { + if (tmpbias + bias > 2*bits_left) { /* ---> */ + int max = -999999; + index=-1; + for (i=0 ; itotal_subbands ; i++){ + if (exp_index1[i] < 7) { + v = (-2*exp_index1[i]) - quant_index_table[i] - 32; + if ( v >= max) { + max = v; + index = i; + } + } + } + if(index==-1)break; + tmp_categorize_array1[tmp_categorize_array1_idx++] = index; + tmpbias -= expbits_tab[exp_index1[index]] - + expbits_tab[exp_index1[index]+1]; + ++exp_index1[index]; + } else { /* <--- */ + int min = 999999; + index=-1; + for (i=0 ; itotal_subbands ; i++){ + if(exp_index2[i] > 0){ + v = (-2*exp_index2[i])-quant_index_table[i]; + if ( v < min) { + min = v; + index = i; + } + } + } + if(index == -1)break; + tmp_categorize_array2[tmp_categorize_array2_idx++] = index; + tmpbias -= expbits_tab[exp_index2[index]] - + expbits_tab[exp_index2[index]-1]; + --exp_index2[index]; + } + } + + for(i=0 ; itotal_subbands ; i++) + category[i] = exp_index2[i]; + + /* Concatenate the two arrays. */ + for(i=tmp_categorize_array2_idx-1 ; i >= 0; i--) + category_index[category_index_size++] = tmp_categorize_array2[i]; + + for(i=0;inumvector_size;i++) + category_index[i]=0; + +} + + +/** + * Expand the category vector. + * + * @param q pointer to the COOKContext + * @param category pointer to the category array + * @param category_index pointer to the category_index array + */ + +static void inline expand_category(COOKContext *q, int* category, + int* category_index){ + int i; + for(i=0 ; inum_vectors ; i++){ + ++category[category_index[i]]; + } +} + +/** + * The real requantization of the mltcoefs + * + * @param q pointer to the COOKContext + * @param index index + * @param band current subband + * @param quant_value_table pointer to the array + * @param subband_coef_index array of indexes to quant_centroid_tab + * @param subband_coef_noise use random noise instead of predetermined value + * @param mlt_buffer pointer to the mlt buffer + */ + + +static void scalar_dequant(COOKContext *q, int index, int band, + float* quant_value_table, int* subband_coef_index, + int* subband_coef_noise, float* mlt_buffer){ + int i; + float f1; + + for(i=0 ; irandom_state = q->random_state * 214013 + 2531011; //typical RNG numbers + f1 = randsign[(q->random_state/0x1000000)&1] * dither_tab[index]; //>>31 + } + mlt_buffer[band*20+ i] = f1 * quant_value_table[band]; + } +} +/** + * Unpack the subband_coef_index and subband_coef_noise vectors. + * + * @param q pointer to the COOKContext + * @param category pointer to the category array + * @param subband_coef_index array of indexes to quant_centroid_tab + * @param subband_coef_noise use random noise instead of predetermined value + */ + +static int unpack_SQVH(COOKContext *q, int category, int* subband_coef_index, + int* subband_coef_noise) { + int i,j; + int vlc, vd ,tmp, result; + int ub; + int cb; + + vd = vd_tab[category]; + result = 0; + for(i=0 ; igb); + vlc = get_vlc2(&q->gb, q->sqvh[category].table, q->sqvh[category].bits, 3); + cb = get_bits_count(&q->gb); + if (q->bits_per_subpacket < get_bits_count(&q->gb)){ + vlc = 0; + result = 1; + } + for(j=vd-1 ; j>=0 ; j--){ + tmp = (vlc * invradix_tab[category])/0x100000; + subband_coef_index[vd*i+j] = vlc - tmp * (kmax_tab[category]+1); + vlc = tmp; + } + for(j=0 ; jgb) < q->bits_per_subpacket){ + subband_coef_noise[i*vd+j] = get_bits1(&q->gb); + } else { + result=1; + subband_coef_noise[i*vd+j]=0; + } + } else { + subband_coef_noise[i*vd+j]=0; + } + } + } + return result; +} + + +/** + * Fill the mlt_buffer with mlt coefficients. + * + * @param q pointer to the COOKContext + * @param category pointer to the category array + * @param quant_value_table pointer to the array + * @param mlt_buffer pointer to mlt coefficients + */ + + +static void decode_vectors(COOKContext* q, int* category, + float* quant_value_table, float* mlt_buffer){ + /* A zero in this table means that the subband coefficient is + random noise coded. */ + int subband_coef_noise[SUBBAND_SIZE]; + /* A zero in this table means that the subband coefficient is a + positive multiplicator. */ + int subband_coef_index[SUBBAND_SIZE]; + int band, j; + int index=0; + + for(band=0 ; bandtotal_subbands ; band++){ + index = category[band]; + if(category[band] < 7){ + if(unpack_SQVH(q, category[band], subband_coef_index, subband_coef_noise)){ + index=7; + for(j=0 ; jtotal_subbands ; j++) category[band+j]=7; + } + } + if(index==7) { + memset(subband_coef_index, 0, sizeof(subband_coef_index)); + memset(subband_coef_noise, 0, sizeof(subband_coef_noise)); + } + scalar_dequant(q, index, band, quant_value_table, subband_coef_index, + subband_coef_noise, mlt_buffer); + } + + if(q->total_subbands*SUBBAND_SIZE >= q->samples_per_channel){ + return; + } +} + + +/** + * function for decoding mono data + * + * @param q pointer to the COOKContext + * @param mlt_buffer1 pointer to left channel mlt coefficients + * @param mlt_buffer2 pointer to right channel mlt coefficients + */ + +static void mono_decode(COOKContext *q, float* mlt_buffer) { + + int category_index[128]; + float quant_value_table[102]; + int quant_index_table[102]; + int category[128]; + + memset(&category, 0, 128*sizeof(int)); + memset(&quant_value_table, 0, 102*sizeof(int)); + memset(&category_index, 0, 128*sizeof(int)); + + decode_envelope(q, quant_index_table); + q->num_vectors = get_bits(&q->gb,q->log2_numvector_size); + dequant_envelope(q, quant_index_table, quant_value_table); + categorize(q, quant_index_table, category, category_index); + expand_category(q, category, category_index); + decode_vectors(q, category, quant_value_table, mlt_buffer); +} + + +/** + * The modulated lapped transform, this takes transform coefficients + * and transforms them into timedomain samples. This is done through + * an FFT-based algorithm with pre- and postrotation steps. + * A window and reorder step is also included. + * + * @param q pointer to the COOKContext + * @param inbuffer pointer to the mltcoefficients + * @param outbuffer pointer to the timedomain buffer + * @param mlt_tmp pointer to temporary storage space + */ + +static void cook_imlt(COOKContext *q, float* inbuffer, float* outbuffer, + float* mlt_tmp){ + int i; + + /* prerotation */ + for(i=0 ; imlt_size ; i+=2){ + outbuffer[i] = (q->mlt_presin[i/2] * inbuffer[q->mlt_size-1-i]) + + (q->mlt_precos[i/2] * inbuffer[i]); + outbuffer[i+1] = (q->mlt_precos[i/2] * inbuffer[q->mlt_size-1-i]) - + (q->mlt_presin[i/2] * inbuffer[i]); + } + + /* FFT */ + ff_fft_permute(&q->fft_ctx, (FFTComplex *) outbuffer); + ff_fft_calc (&q->fft_ctx, (FFTComplex *) outbuffer); + + /* postrotation */ + for(i=0 ; imlt_size ; i+=2){ + mlt_tmp[i] = (q->mlt_postcos[(q->mlt_size-1-i)/2] * outbuffer[i+1]) + + (q->mlt_postcos[i/2] * outbuffer[i]); + mlt_tmp[q->mlt_size-1-i] = (q->mlt_postcos[(q->mlt_size-1-i)/2] * outbuffer[i]) - + (q->mlt_postcos[i/2] * outbuffer[i+1]); + } + + /* window and reorder */ + for(i=0 ; imlt_size/2 ; i++){ + outbuffer[i] = mlt_tmp[q->mlt_size/2-1-i] * q->mlt_window[i]; + outbuffer[q->mlt_size-1-i]= mlt_tmp[q->mlt_size/2-1-i] * + q->mlt_window[q->mlt_size-1-i]; + outbuffer[q->mlt_size+i]= mlt_tmp[q->mlt_size/2+i] * + q->mlt_window[q->mlt_size-1-i]; + outbuffer[2*q->mlt_size-1-i]= -(mlt_tmp[q->mlt_size/2+i] * + q->mlt_window[i]); + } +} + + +/** + * the actual requantization of the timedomain samples + * + * @param q pointer to the COOKContext + * @param buffer pointer to the timedomain buffer + * @param gain_index index for the block multiplier + * @param gain_index_next index for the next block multiplier + */ + +static void interpolate(COOKContext *q, float* buffer, + int gain_index, int gain_index_next){ + int i; + float fc1, fc2; + fc1 = q->pow2tab[gain_index+63]; + + if(gain_index == gain_index_next){ //static gain + for(i=0 ; igain_size_factor ; i++){ + buffer[i]*=fc1; + } + return; + } else { //smooth gain + fc2 = q->gain_table[11 + (gain_index_next-gain_index)]; + for(i=0 ; igain_size_factor ; i++){ + buffer[i]*=fc1; + fc1*=fc2; + } + return; + } +} + +/** + * timedomain requantization of the timedomain samples + * + * @param q pointer to the COOKContext + * @param buffer pointer to the timedomain buffer + * @param gain_now current gain structure + * @param gain_previous previous gain structure + */ + +static void gain_window(COOKContext *q, float* buffer, COOKgain* gain_now, + COOKgain* gain_previous){ + int i, index; + int gain_index[9]; + int tmp_gain_index; + + gain_index[8]=0; + index = gain_previous->size; + for (i=7 ; i>=0 ; i--) { + if(index && gain_previous->qidx_table1[index-1]==i) { + gain_index[i] = gain_previous->qidx_table2[index-1]; + index--; + } else { + gain_index[i]=gain_index[i+1]; + } + } + /* This is applied to the to be previous data buffer. */ + for(i=0;i<8;i++){ + interpolate(q, &buffer[q->samples_per_channel+q->gain_size_factor*i], + gain_index[i], gain_index[i+1]); + } + + tmp_gain_index = gain_index[0]; + index = gain_now->size; + for (i=7 ; i>=0 ; i--) { + if(index && gain_now->qidx_table1[index-1]==i) { + gain_index[i]= gain_now->qidx_table2[index-1]; + index--; + } else { + gain_index[i]=gain_index[i+1]; + } + } + + /* This is applied to the to be current block. */ + for(i=0;i<8;i++){ + interpolate(q, &buffer[i*q->gain_size_factor], + tmp_gain_index+gain_index[i], + tmp_gain_index+gain_index[i+1]); + } +} + + +/** + * mlt overlapping and buffer management + * + * @param q pointer to the COOKContext + * @param buffer pointer to the timedomain buffer + * @param gain_now current gain structure + * @param gain_previous previous gain structure + * @param previous_buffer pointer to the previous buffer to be used for overlapping + * + */ + +static void gain_compensate(COOKContext *q, float* buffer, COOKgain* gain_now, + COOKgain* gain_previous, float* previous_buffer) { + int i; + if((gain_now->size || gain_previous->size)) { + gain_window(q, buffer, gain_now, gain_previous); + } + + /* Overlap with the previous block. */ + for(i=0 ; isamples_per_channel ; i++) buffer[i]+=previous_buffer[i]; + + /* Save away the current to be previous block. */ + memcpy(previous_buffer, buffer+q->samples_per_channel, + sizeof(float)*q->samples_per_channel); +} + + +/** + * function for getting the jointstereo coupling information + * + * @param q pointer to the COOKContext + * @param decouple_tab decoupling array + * + */ + +static void decouple_info(COOKContext *q, int* decouple_tab){ + int length, i; + + if(get_bits1(&q->gb)) { + if(cplband[q->js_subband_start] > cplband[q->subbands-1]) return; + + length = cplband[q->subbands-1] - cplband[q->js_subband_start] + 1; + for (i=0 ; ijs_subband_start] + i] = get_vlc2(&q->gb, q->ccpl.table, q->ccpl.bits, 2); + } + return; + } + + if(cplband[q->js_subband_start] > cplband[q->subbands-1]) return; + + length = cplband[q->subbands-1] - cplband[q->js_subband_start] + 1; + for (i=0 ; ijs_subband_start] + i] = get_bits(&q->gb, q->js_vlc_bits); + } + return; +} + + +/** + * function for decoding joint stereo data + * + * @param q pointer to the COOKContext + * @param mlt_buffer1 pointer to left channel mlt coefficients + * @param mlt_buffer2 pointer to right channel mlt coefficients + */ + +static void joint_decode(COOKContext *q, float* mlt_buffer1, + float* mlt_buffer2) { + int i,j; + int decouple_tab[SUBBAND_SIZE]; + float decode_buffer[1060]; + int idx, cpl_tmp,tmp_idx; + float f1,f2; + float* cplscale; + + memset(decouple_tab, 0, sizeof(decouple_tab)); + memset(decode_buffer, 0, sizeof(decode_buffer)); + + /* Make sure the buffers are zeroed out. */ + memset(mlt_buffer1,0, 1024*sizeof(float)); + memset(mlt_buffer2,0, 1024*sizeof(float)); + decouple_info(q, decouple_tab); + mono_decode(q, decode_buffer); + + /* The two channels are stored interleaved in decode_buffer. */ + for (i=0 ; ijs_subband_start ; i++) { + for (j=0 ; jjs_vlc_bits) - 1; + for (i=q->js_subband_start ; isubbands ; i++) { + cpl_tmp = cplband[i]; + idx -=decouple_tab[cpl_tmp]; + cplscale = (float*)cplscales[q->js_vlc_bits-2]; //choose decoupler table + f1 = cplscale[decouple_tab[cpl_tmp]]; + f2 = cplscale[idx-1]; + for (j=0 ; jjs_subband_start + i)*20)+j; + mlt_buffer1[20*i + j] = f1 * decode_buffer[tmp_idx]; + mlt_buffer2[20*i + j] = f2 * decode_buffer[tmp_idx]; + } + idx = (1 << q->js_vlc_bits) - 1; + } +} + +/** + * Cook subpacket decoding. This function returns one decoded subpacket, + * usually 1024 samples per channel. + * + * @param q pointer to the COOKContext + * @param inbuffer pointer to the inbuffer + * @param sub_packet_size subpacket size + * @param outbuffer pointer to the outbuffer + */ + + +static int decode_subpacket(COOKContext *q, uint8_t *inbuffer, + int sub_packet_size, int16_t *outbuffer) { + int i,j; + int value; + float* tmp_ptr; + + /* packet dump */ +// for (i=0 ; idecoded_bytes_buffer, sub_packet_size); + init_get_bits(&q->gb, q->decoded_bytes_buffer, sub_packet_size*8); + decode_gain_info(&q->gb, &q->gain_current); + + if(q->nb_channels==2 && q->joint_stereo==1){ + joint_decode(q, q->decode_buf_ptr[0], q->decode_buf_ptr[2]); + + /* Swap buffer pointers. */ + tmp_ptr = q->decode_buf_ptr[1]; + q->decode_buf_ptr[1] = q->decode_buf_ptr[0]; + q->decode_buf_ptr[0] = tmp_ptr; + tmp_ptr = q->decode_buf_ptr[3]; + q->decode_buf_ptr[3] = q->decode_buf_ptr[2]; + q->decode_buf_ptr[2] = tmp_ptr; + + /* FIXME: Rethink the gainbuffer handling, maybe a rename? + now/previous swap */ + q->gain_now_ptr = &q->gain_now; + q->gain_previous_ptr = &q->gain_previous; + for (i=0 ; inb_channels ; i++){ + + cook_imlt(q, q->decode_buf_ptr[i*2], q->mono_mdct_output, q->mlt_tmp); + gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr, + q->gain_previous_ptr, q->previous_buffer_ptr[0]); + + /* Swap out the previous buffer. */ + tmp_ptr = q->previous_buffer_ptr[0]; + q->previous_buffer_ptr[0] = q->previous_buffer_ptr[1]; + q->previous_buffer_ptr[1] = tmp_ptr; + + /* Clip and convert the floats to 16 bits. */ + for (j=0 ; jsamples_per_frame ; j++){ + value = lrintf(q->mono_mdct_output[j]); + if(value < -32768) value = -32768; + else if(value > 32767) value = 32767; + outbuffer[2*j+i] = value; + } + } + + memcpy(&q->gain_now, &q->gain_previous, sizeof(COOKgain)); + memcpy(&q->gain_previous, &q->gain_current, sizeof(COOKgain)); + + } else if (q->nb_channels==2 && q->joint_stereo==0) { + /* channel 0 */ + mono_decode(q, q->decode_buf_ptr2[0]); + + tmp_ptr = q->decode_buf_ptr2[0]; + q->decode_buf_ptr2[0] = q->decode_buf_ptr2[1]; + q->decode_buf_ptr2[1] = tmp_ptr; + + memcpy(&q->gain_channel1[0], &q->gain_current ,sizeof(COOKgain)); + q->gain_now_ptr = &q->gain_channel1[0]; + q->gain_previous_ptr = &q->gain_channel1[1]; + + cook_imlt(q, q->decode_buf_ptr2[0], q->mono_mdct_output,q->mlt_tmp); + gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr, + q->gain_previous_ptr, q->mono_previous_buffer1); + + memcpy(&q->gain_channel1[1], &q->gain_channel1[0],sizeof(COOKgain)); + + + for (j=0 ; jsamples_per_frame ; j++){ + value = lrintf(q->mono_mdct_output[j]); + if(value < -32768) value = -32768; + else if(value > 32767) value = 32767; + outbuffer[2*j+1] = value; + } + + /* channel 1 */ + //av_log(NULL,AV_LOG_ERROR,"bits = %d\n",get_bits_count(&q->gb)); + init_get_bits(&q->gb, q->decoded_bytes_buffer, sub_packet_size*8+q->bits_per_subpacket); + + q->gain_now_ptr = &q->gain_channel2[0]; + q->gain_previous_ptr = &q->gain_channel2[1]; + + decode_gain_info(&q->gb, &q->gain_channel2[0]); + mono_decode(q, q->decode_buf_ptr[0]); + + tmp_ptr = q->decode_buf_ptr[0]; + q->decode_buf_ptr[0] = q->decode_buf_ptr[1]; + q->decode_buf_ptr[1] = tmp_ptr; + + cook_imlt(q, q->decode_buf_ptr[0], q->mono_mdct_output,q->mlt_tmp); + gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr, + q->gain_previous_ptr, q->mono_previous_buffer2); + + /* Swap out the previous buffer. */ + tmp_ptr = q->previous_buffer_ptr[0]; + q->previous_buffer_ptr[0] = q->previous_buffer_ptr[1]; + q->previous_buffer_ptr[1] = tmp_ptr; + + memcpy(&q->gain_channel2[1], &q->gain_channel2[0] ,sizeof(COOKgain)); + + for (j=0 ; jsamples_per_frame ; j++){ + value = lrintf(q->mono_mdct_output[j]); + if(value < -32768) value = -32768; + else if(value > 32767) value = 32767; + outbuffer[2*j] = value; + } + + } else { + mono_decode(q, q->decode_buf_ptr[0]); + + /* Swap buffer pointers. */ + tmp_ptr = q->decode_buf_ptr[1]; + q->decode_buf_ptr[1] = q->decode_buf_ptr[0]; + q->decode_buf_ptr[0] = tmp_ptr; + + /* FIXME: Rethink the gainbuffer handling, maybe a rename? + now/previous swap */ + q->gain_now_ptr = &q->gain_now; + q->gain_previous_ptr = &q->gain_previous; + + cook_imlt(q, q->decode_buf_ptr[0], q->mono_mdct_output,q->mlt_tmp); + gain_compensate(q, q->mono_mdct_output, q->gain_now_ptr, + q->gain_previous_ptr, q->mono_previous_buffer1); + + /* Clip and convert the floats to 16 bits */ + for (j=0 ; jsamples_per_frame ; j++){ + value = lrintf(q->mono_mdct_output[j]); + if(value < -32768) value = -32768; + else if(value > 32767) value = 32767; + outbuffer[j] = value; + } + memcpy(&q->gain_now, &q->gain_previous, sizeof(COOKgain)); + memcpy(&q->gain_previous, &q->gain_current, sizeof(COOKgain)); + } + return q->samples_per_frame * sizeof(int16_t); +} + + +/** + * Cook frame decoding + * + * @param avctx pointer to the AVCodecContext + */ + +static int cook_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) { + COOKContext *q = avctx->priv_data; + + if (buf_size < avctx->block_align) + return buf_size; + + *data_size = decode_subpacket(q, buf, avctx->block_align, data); + + return avctx->block_align; +} + +#ifdef COOKDEBUG +static void dump_cook_context(COOKContext *q, COOKextradata *e) +{ + //int i=0; +#define PRINT(a,b) av_log(NULL,AV_LOG_ERROR," %s = %d\n", a, b); + av_log(NULL,AV_LOG_ERROR,"COOKextradata\n"); + av_log(NULL,AV_LOG_ERROR,"cookversion=%x\n",e->cookversion); + if (e->cookversion > MONO_COOK2) { + PRINT("js_subband_start",e->js_subband_start); + PRINT("js_vlc_bits",e->js_vlc_bits); + } + av_log(NULL,AV_LOG_ERROR,"COOKContext\n"); + PRINT("nb_channels",q->nb_channels); + PRINT("bit_rate",q->bit_rate); + PRINT("sample_rate",q->sample_rate); + PRINT("samples_per_channel",q->samples_per_channel); + PRINT("samples_per_frame",q->samples_per_frame); + PRINT("subbands",q->subbands); + PRINT("random_state",q->random_state); + PRINT("mlt_size",q->mlt_size); + PRINT("js_subband_start",q->js_subband_start); + PRINT("log2_numvector_size",q->log2_numvector_size); + PRINT("numvector_size",q->numvector_size); + PRINT("total_subbands",q->total_subbands); +} +#endif + +/** + * Cook initialization + * + * @param avctx pointer to the AVCodecContext + */ + +static int cook_decode_init(AVCodecContext *avctx) +{ + COOKextradata *e = avctx->extradata; + COOKContext *q = avctx->priv_data; + + /* Take care of the codec specific extradata. */ + if (avctx->extradata_size <= 0) { + av_log(NULL,AV_LOG_ERROR,"Necessary extradata missing!\n"); + return -1; + } else { + /* 8 for mono, 16 for stereo, ? for multichannel + Swap to right endianness so we don't need to care later on. */ + av_log(NULL,AV_LOG_DEBUG,"codecdata_length=%d\n",avctx->extradata_size); + if (avctx->extradata_size >= 8){ + e->cookversion = be2me_32(e->cookversion); + e->samples_per_frame = be2me_16(e->samples_per_frame); + e->subbands = be2me_16(e->subbands); + } + if (avctx->extradata_size >= 16){ + e->js_subband_start = be2me_16(e->js_subband_start); + e->js_vlc_bits = be2me_16(e->js_vlc_bits); + } + } + + /* Take data from the AVCodecContext (RM container). */ + q->sample_rate = avctx->sample_rate; + q->nb_channels = avctx->channels; + q->bit_rate = avctx->bit_rate; + + /* Initialize state. */ + q->random_state = 1; + + /* Initialize extradata related variables. */ + q->samples_per_channel = e->samples_per_frame / q->nb_channels; + q->samples_per_frame = e->samples_per_frame; + q->subbands = e->subbands; + q->bits_per_subpacket = avctx->block_align * 8; + + /* Initialize default data states. */ + q->js_subband_start = 0; + q->log2_numvector_size = 5; + q->total_subbands = q->subbands; + + /* Initialize version-dependent variables */ + av_log(NULL,AV_LOG_DEBUG,"e->cookversion=%x\n",e->cookversion); + switch (e->cookversion) { + case MONO_COOK1: + if (q->nb_channels != 1) { + av_log(NULL,AV_LOG_ERROR,"Container channels != 1, report sample!\n"); + return -1; + } + av_log(NULL,AV_LOG_DEBUG,"MONO_COOK1\n"); + break; + case MONO_COOK2: + if (q->nb_channels != 1) { + q->joint_stereo = 0; + q->bits_per_subpacket = q->bits_per_subpacket/2; + } + av_log(NULL,AV_LOG_DEBUG,"MONO_COOK2\n"); + break; + case JOINT_STEREO: + if (q->nb_channels != 2) { + av_log(NULL,AV_LOG_ERROR,"Container channels != 2, report sample!\n"); + return -1; + } + av_log(NULL,AV_LOG_DEBUG,"JOINT_STEREO\n"); + if (avctx->extradata_size >= 16){ + q->total_subbands = q->subbands + e->js_subband_start; + q->js_subband_start = e->js_subband_start; + q->joint_stereo = 1; + q->js_vlc_bits = e->js_vlc_bits; + } + if (q->samples_per_channel > 256) { + q->log2_numvector_size = 6; + } + if (q->samples_per_channel > 512) { + q->log2_numvector_size = 7; + } + break; + case MC_COOK: + av_log(NULL,AV_LOG_ERROR,"MC_COOK not supported!\n"); + return -1; + break; + default: + av_log(NULL,AV_LOG_ERROR,"Unknown Cook version, report sample!\n"); + return -1; + break; + } + + /* Initialize variable relations */ + q->mlt_size = q->samples_per_channel; + q->numvector_size = (1 << q->log2_numvector_size); + + /* Generate tables */ + init_rootpow2table(q); + init_pow2table(q); + init_gain_table(q); + + if (init_cook_vlc_tables(q) != 0) + return -1; + + /* Pad the databuffer with FF_INPUT_BUFFER_PADDING_SIZE, + this is for the bitstreamreader. */ + if ((q->decoded_bytes_buffer = av_mallocz((avctx->block_align+(4-avctx->block_align%4) + FF_INPUT_BUFFER_PADDING_SIZE)*sizeof(uint8_t))) == NULL) + return -1; + + q->decode_buf_ptr[0] = q->decode_buffer_1; + q->decode_buf_ptr[1] = q->decode_buffer_2; + q->decode_buf_ptr[2] = q->decode_buffer_3; + q->decode_buf_ptr[3] = q->decode_buffer_4; + + q->decode_buf_ptr2[0] = q->decode_buffer_3; + q->decode_buf_ptr2[1] = q->decode_buffer_4; + + q->previous_buffer_ptr[0] = q->mono_previous_buffer1; + q->previous_buffer_ptr[1] = q->mono_previous_buffer2; + + /* Initialize transform. */ + if ( init_cook_mlt(q) == 0 ) + return -1; + + /* Try to catch some obviously faulty streams, othervise it might be exploitable */ + if (q->total_subbands > 53) { + av_log(NULL,AV_LOG_ERROR,"total_subbands > 53, report sample!\n"); + return -1; + } + if (q->subbands > 50) { + av_log(NULL,AV_LOG_ERROR,"subbands > 50, report sample!\n"); + return -1; + } + if ((q->samples_per_channel == 256) || (q->samples_per_channel == 512) || (q->samples_per_channel == 1024)) { + } else { + av_log(NULL,AV_LOG_ERROR,"unknown amount of samples_per_channel = %d, report sample!\n",q->samples_per_channel); + return -1; + } + +#ifdef COOKDEBUG + dump_cook_context(q,e); +#endif + return 0; +} + + +AVCodec cook_decoder = +{ + .name = "cook", + .type = CODEC_TYPE_AUDIO, + .id = CODEC_ID_COOK, + .priv_data_size = sizeof(COOKContext), + .init = cook_decode_init, + .close = cook_decode_close, + .decode = cook_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/cookdata.h b/mpeg4/src/libavcodec/cookdata.h new file mode 100644 index 0000000000000000000000000000000000000000..1247d9d91f16f5da5eaf445f2350f0aec6c554d5 --- /dev/null +++ b/mpeg4/src/libavcodec/cookdata.h @@ -0,0 +1,557 @@ +/* + * COOK compatible decoder data + * Copyright (c) 2003 Sascha Sommer + * Copyright (c) 2005 Benjamin Larsson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file cookdata.h + * Cook AKA RealAudio G2 compatible decoderdata + */ + +/* various data tables */ + +static const int expbits_tab[8] = { + 52,47,43,37,29,22,16,0, +}; + +static const float dither_tab[8] = { + 0.0, 0.0, 0.0, 0.0, 0.0, 0.176777, 0.25, 0.707107, +}; + +static const float randsign[2] = {1.0, -1.0}; + +static const float quant_centroid_tab[7][14] = { + { 0.000, 0.392, 0.761, 1.120, 1.477, 1.832, 2.183, 2.541, 2.893, 3.245, 3.598, 3.942, 4.288, 4.724 }, + { 0.000, 0.544, 1.060, 1.563, 2.068, 2.571, 3.072, 3.562, 4.070, 4.620, 0.000, 0.000, 0.000, 0.000 }, + { 0.000, 0.746, 1.464, 2.180, 2.882, 3.584, 4.316, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, + { 0.000, 1.006, 2.000, 2.993, 3.985, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, + { 0.000, 1.321, 2.703, 3.983, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, + { 0.000, 1.657, 3.491, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 }, + { 0.000, 1.964, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000 } +}; + +static const int invradix_tab[7] = { + 74899, 104858, 149797, 209716, 262144, 349526, 524288, +}; + +static const int kmax_tab[7] = { + 13, 9, 6, 4, 3, 2, 1, +}; + +static const int vd_tab[7] = { + 2, 2, 2, 4, 4, 5, 5, +}; + +static const int vpr_tab[7] = { + 10, 10, 10, 5, 5, 4, 4, +}; + + + +/* VLC data */ + +static const int vhsize_tab[7] = { + 191, 97, 48, 607, 246, 230, 32, +}; + +static const int vhvlcsize_tab[7] = { + 8, 7, 7, 10, 9, 9, 6, +}; + +static const uint8_t envelope_quant_index_huffbits[13][24] = { + { 4, 6, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 5, 7, 8, 9, 11, 11, 12, 12, 12, 12 }, + { 10, 8, 6, 5, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5, 7, 9, 11, 12, 13, 15, 15, 15, 16, 16 }, + { 12, 10, 8, 6, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4, 4, 5, 5, 7, 9, 11, 13, 14, 14 }, + { 13, 10, 9, 9, 7, 7, 5, 5, 4, 3, 3, 3, 3, 3, 4, 4, 4, 5, 7, 9, 11, 13, 13, 13 }, + { 12, 13, 10, 8, 6, 6, 5, 5, 4, 4, 3, 3, 3, 3, 3, 4, 5, 5, 6, 7, 9, 11, 14, 14 }, + { 12, 11, 9, 8, 8, 7, 5, 4, 4, 3, 3, 3, 3, 3, 4, 4, 5, 5, 7, 8, 10, 13, 14, 14 }, + { 15, 16, 15, 12, 10, 8, 6, 5, 4, 3, 3, 3, 2, 3, 4, 5, 5, 7, 9, 11, 13, 16, 16, 16 }, + { 14, 14, 11, 10, 9, 7, 7, 5, 5, 4, 3, 3, 2, 3, 3, 4, 5, 7, 9, 9, 12, 14, 15, 15 }, + { 9, 9, 9, 8, 7, 6, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 13 }, + { 14, 12, 10, 8, 6, 6, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5, 6, 8, 8, 9, 11, 14, 14, 14 }, + { 13, 10, 9, 8, 6, 6, 5, 4, 4, 4, 3, 3, 2, 3, 4, 5, 6, 8, 9, 9, 11, 12, 14, 14 }, + { 16, 13, 12, 11, 9, 6, 5, 5, 4, 4, 4, 3, 2, 3, 3, 4, 5, 7, 8, 10, 14, 16, 16, 16 }, + { 13, 14, 14, 14, 10, 8, 7, 7, 5, 4, 3, 3, 2, 3, 3, 4, 5, 5, 7, 9, 11, 14, 14, 14 }, +}; + +static const uint16_t envelope_quant_index_huffcodes[13][24] = { + {0x0006, 0x003e, 0x001c, 0x001d, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x0000, 0x0001, + 0x0002, 0x000d, 0x001e, 0x007e, 0x00fe, 0x01fe, 0x07fc, 0x07fd, 0x0ffc, 0x0ffd, 0x0ffe, 0x0fff}, + {0x03fe, 0x00fe, 0x003e, 0x001c, 0x001d, 0x000c, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x000d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x0ffe, 0x1ffe, 0x7ffc, 0x7ffd, 0x7ffe, 0xfffe, 0xffff}, + {0x0ffe, 0x03fe, 0x00fe, 0x003e, 0x001c, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x0000, + 0x0001, 0x0002, 0x000c, 0x000d, 0x001d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x1ffe, 0x3ffe, 0x3fff}, + {0x1ffc, 0x03fe, 0x01fc, 0x01fd, 0x007c, 0x007d, 0x001c, 0x001d, 0x000a, 0x0000, 0x0001, 0x0002, + 0x0003, 0x0004, 0x000b, 0x000c, 0x000d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x1ffd, 0x1ffe, 0x1fff}, + {0x0ffe, 0x1ffe, 0x03fe, 0x00fe, 0x003c, 0x003d, 0x001a, 0x001b, 0x000a, 0x000b, 0x0000, 0x0001, + 0x0002, 0x0003, 0x0004, 0x000c, 0x001c, 0x001d, 0x003e, 0x007e, 0x01fe, 0x07fe, 0x3ffe, 0x3fff}, + {0x0ffe, 0x07fe, 0x01fe, 0x00fc, 0x00fd, 0x007c, 0x001c, 0x000a, 0x000b, 0x0000, 0x0001, 0x0002, + 0x0003, 0x0004, 0x000c, 0x000d, 0x001d, 0x001e, 0x007d, 0x00fe, 0x03fe, 0x1ffe, 0x3ffe, 0x3fff}, + {0x7ffc, 0xfffc, 0x7ffd, 0x0ffe, 0x03fe, 0x00fe, 0x003e, 0x001c, 0x000c, 0x0002, 0x0003, 0x0004, + 0x0000, 0x0005, 0x000d, 0x001d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x1ffe, 0xfffd, 0xfffe, 0xffff}, + {0x3ffc, 0x3ffd, 0x07fe, 0x03fe, 0x01fc, 0x007c, 0x007d, 0x001c, 0x001d, 0x000c, 0x0002, 0x0003, + 0x0000, 0x0004, 0x0005, 0x000d, 0x001e, 0x007e, 0x01fd, 0x01fe, 0x0ffe, 0x3ffe, 0x7ffe, 0x7fff}, + {0x01fc, 0x01fd, 0x01fe, 0x00fc, 0x007c, 0x003c, 0x001c, 0x000c, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x000d, 0x001d, 0x003d, 0x007d, 0x00fd, 0x03fe, 0x07fe, 0x0ffe, 0x1ffe, 0x1fff}, + {0x3ffc, 0x0ffe, 0x03fe, 0x00fc, 0x003c, 0x003d, 0x001c, 0x000c, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x000d, 0x001d, 0x003e, 0x00fd, 0x00fe, 0x01fe, 0x07fe, 0x3ffd, 0x3ffe, 0x3fff}, + {0x1ffe, 0x03fe, 0x01fc, 0x00fc, 0x003c, 0x003d, 0x001c, 0x000a, 0x000b, 0x000c, 0x0002, 0x0003, + 0x0000, 0x0004, 0x000d, 0x001d, 0x003e, 0x00fd, 0x01fd, 0x01fe, 0x07fe, 0x0ffe, 0x3ffe, 0x3fff}, + {0xfffc, 0x1ffe, 0x0ffe, 0x07fe, 0x01fe, 0x003e, 0x001c, 0x001d, 0x000a, 0x000b, 0x000c, 0x0002, + 0x0000, 0x0003, 0x0004, 0x000d, 0x001e, 0x007e, 0x00fe, 0x03fe, 0x3ffe, 0xfffd, 0xfffe, 0xffff}, + {0x1ffc, 0x3ffa, 0x3ffb, 0x3ffc, 0x03fe, 0x00fe, 0x007c, 0x007d, 0x001c, 0x000c, 0x0002, 0x0003, + 0x0000, 0x0004, 0x0005, 0x000d, 0x001d, 0x001e, 0x007e, 0x01fe, 0x07fe, 0x3ffd, 0x3ffe, 0x3fff}, +}; + + +static const uint8_t cvh_huffbits0[191] = { + 1, 4, 6, 6, 7, 7, 8, 8, 8, 9, 9, 10, + 11, 11, 4, 5, 6, 7, 7, 8, 8, 9, 9, 9, + 9, 10, 11, 11, 5, 6, 7, 8, 8, 9, 9, 9, + 9, 10, 10, 10, 11, 12, 6, 7, 8, 9, 9, 9, + 9, 10, 10, 10, 10, 11, 12, 13, 7, 7, 8, 9, + 9, 9, 10, 10, 10, 10, 11, 11, 12, 13, 8, 8, + 9, 9, 9, 10, 10, 10, 10, 11, 11, 12, 13, 14, + 8, 8, 9, 9, 10, 10, 11, 11, 11, 12, 12, 13, + 13, 15, 8, 8, 9, 9, 10, 10, 11, 11, 11, 12, + 12, 13, 14, 15, 9, 9, 9, 10, 10, 10, 11, 11, + 12, 13, 12, 14, 15, 16, 9, 9, 10, 10, 10, 10, + 11, 12, 12, 14, 14, 16, 16, 0, 9, 9, 10, 10, + 11, 11, 12, 13, 13, 14, 14, 15, 0, 0, 10, 10, + 10, 11, 11, 12, 12, 13, 15, 15, 16, 0, 0, 0, + 11, 11, 11, 12, 13, 13, 13, 15, 16, 16, 0, 0, + 0, 0, 11, 11, 12, 13, 13, 14, 15, 16, 16, +}; + +static const uint16_t cvh_huffcodes0[191] = { + 0x0000,0x0008,0x002c,0x002d,0x0062,0x0063,0x00d4,0x00d5,0x00d6,0x01c6,0x01c7,0x03ca, + 0x07d6,0x07d7,0x0009,0x0014,0x002e,0x0064,0x0065,0x00d7,0x00d8,0x01c8,0x01c9,0x01ca, + 0x01cb,0x03cb,0x07d8,0x07d9,0x0015,0x002f,0x0066,0x00d9,0x00da,0x01cc,0x01cd,0x01ce, + 0x01cf,0x03cc,0x03cd,0x03ce,0x07da,0x0fe4,0x0030,0x0067,0x00db,0x01d0,0x01d1,0x01d2, + 0x01d3,0x03cf,0x03d0,0x03d1,0x03d2,0x07db,0x0fe5,0x1fea,0x0068,0x0069,0x00dc,0x01d4, + 0x01d5,0x01d6,0x03d3,0x03d4,0x03d5,0x03d6,0x07dc,0x07dd,0x0fe6,0x1feb,0x00dd,0x00de, + 0x01d7,0x01d8,0x01d9,0x03d7,0x03d8,0x03d9,0x03da,0x07de,0x07df,0x0fe7,0x1fec,0x3ff2, + 0x00df,0x00e0,0x01da,0x01db,0x03db,0x03dc,0x07e0,0x07e1,0x07e2,0x0fe8,0x0fe9,0x1fed, + 0x1fee,0x7ff4,0x00e1,0x00e2,0x01dc,0x01dd,0x03dd,0x03de,0x07e3,0x07e4,0x07e5,0x0fea, + 0x0feb,0x1fef,0x3ff3,0x7ff5,0x01de,0x01df,0x01e0,0x03df,0x03e0,0x03e1,0x07e6,0x07e7, + 0x0fec,0x1ff0,0x0fed,0x3ff4,0x7ff6,0xfff8,0x01e1,0x01e2,0x03e2,0x03e3,0x03e4,0x03e5, + 0x07e8,0x0fee,0x0fef,0x3ff5,0x3ff6,0xfff9,0xfffa,0xfffa,0x01e3,0x01e4,0x03e6,0x03e7, + 0x07e9,0x07ea,0x0ff0,0x1ff1,0x1ff2,0x3ff7,0x3ff8,0x7ff7,0x7ff7,0xfffa,0x03e8,0x03e9, + 0x03ea,0x07eb,0x07ec,0x0ff1,0x0ff2,0x1ff3,0x7ff8,0x7ff9,0xfffb,0x3ff8,0x7ff7,0x7ff7, + 0x07ed,0x07ee,0x07ef,0x0ff3,0x1ff4,0x1ff5,0x1ff6,0x7ffa,0xfffc,0xfffd,0xfffb,0xfffb, + 0x3ff8,0x7ff7,0x07f0,0x07f1,0x0ff4,0x1ff7,0x1ff8,0x3ff9,0x7ffb,0xfffe,0xffff, +}; + + +static const uint8_t cvh_huffbits1[97] = { + 1, 4, 5, 6, 7, 8, 8, 9, 10, 10, 4, 5, + 6, 7, 7, 8, 8, 9, 9, 11, 5, 5, 6, 7, + 8, 8, 9, 9, 10, 11, 6, 6, 7, 8, 8, 9, + 9, 10, 11, 12, 7, 7, 8, 8, 9, 9, 10, 11, + 11, 13, 8, 8, 8, 9, 9, 10, 10, 11, 12, 14, + 8, 8, 8, 9, 10, 11, 11, 12, 13, 15, 9, 9, + 9, 10, 11, 12, 12, 14, 14, 0, 9, 9, 9, 10, + 11, 12, 14, 16, 0, 0, 10, 10, 11, 12, 13, 14, + 16, +}; + + +static const uint16_t cvh_huffcodes1[97] = { + 0x0000,0x0008,0x0014,0x0030,0x006a,0x00e2,0x00e3,0x01e4,0x03ec,0x03ed,0x0009,0x0015, + 0x0031,0x006b,0x006c,0x00e4,0x00e5,0x01e5,0x01e6,0x07f0,0x0016,0x0017,0x0032,0x006d, + 0x00e6,0x00e7,0x01e7,0x01e8,0x03ee,0x07f1,0x0033,0x0034,0x006e,0x00e8,0x00e9,0x01e9, + 0x01ea,0x03ef,0x07f2,0x0ff6,0x006f,0x0070,0x00ea,0x00eb,0x01eb,0x01ec,0x03f0,0x07f3, + 0x07f4,0x1ffa,0x00ec,0x00ed,0x00ee,0x01ed,0x01ee,0x03f1,0x03f2,0x07f5,0x0ff7,0x3ffa, + 0x00ef,0x00f0,0x00f1,0x01ef,0x03f3,0x07f6,0x07f7,0x0ff8,0x1ffb,0x7ffe,0x01f0,0x01f1, + 0x01f2,0x03f4,0x07f8,0x0ff9,0x0ffa,0x3ffb,0x3ffc,0x0000,0x01f3,0x01f4,0x01f5,0x03f5, + 0x07f9,0x0ffb,0x3ffd,0xfffe,0x0000,0x0000,0x03f6,0x03f7,0x07fa,0x0ffc,0x1ffc,0x3ffe, + 0xffff, +}; + +static const uint8_t cvh_huffbits2[48] = { + 1, 4, 5, 7, 8, 9, 10, 3, 4, 5, 7, 8, + 9, 10, 5, 5, 6, 7, 8, 10, 10, 7, 6, 7, + 8, 9, 10, 12, 8, 8, 8, 9, 10, 12, 14, 8, + 9, 9, 10, 11, 15, 16, 9, 10, 11, 12, 13, 16, +}; + +static const uint16_t cvh_huffcodes2[48] = { + 0x0000,0x000a,0x0018,0x0074,0x00f2,0x01f4,0x03f6,0x0004,0x000b,0x0019,0x0075,0x00f3, + 0x01f5,0x03f7,0x001a,0x001b,0x0038,0x0076,0x00f4,0x03f8,0x03f9,0x0077,0x0039,0x0078, + 0x00f5,0x01f6,0x03fa,0x0ffc,0x00f6,0x00f7,0x00f8,0x01f7,0x03fb,0x0ffd,0x3ffe,0x00f9, + 0x01f8,0x01f9,0x03fc,0x07fc,0x7ffe,0xfffe,0x01fa,0x03fd,0x07fd,0x0ffe,0x1ffe,0xffff, +}; + +static const uint8_t cvh_huffbits3[607] = { + 2, 4, 6, 8, 10, 5, 5, 6, 8, 10, 7, 8, + 8, 10, 12, 9, 9, 10, 12, 15, 10, 11, 13, 16, + 16, 5, 6, 8, 10, 11, 5, 6, 8, 10, 12, 7, + 7, 8, 10, 13, 9, 9, 10, 12, 15, 12, 11, 13, + 16, 16, 7, 9, 10, 12, 15, 7, 8, 10, 12, 13, + 9, 9, 11, 13, 16, 11, 11, 12, 14, 16, 12, 12, + 14, 16, 0, 9, 11, 12, 16, 16, 9, 10, 13, 15, + 16, 10, 11, 12, 16, 16, 13, 13, 16, 16, 16, 16, + 16, 15, 16, 0, 11, 13, 16, 16, 15, 11, 13, 15, + 16, 16, 13, 13, 16, 16, 0, 14, 16, 16, 16, 0, + 16, 16, 0, 0, 0, 4, 6, 8, 10, 13, 6, 6, + 8, 10, 13, 9, 8, 10, 12, 16, 10, 10, 11, 15, + 16, 13, 12, 14, 16, 16, 5, 6, 8, 11, 13, 6, + 6, 8, 10, 13, 8, 8, 9, 11, 14, 10, 10, 12, + 12, 16, 13, 12, 13, 15, 16, 7, 8, 9, 12, 16, + 7, 8, 10, 12, 14, 9, 9, 10, 13, 16, 11, 10, + 12, 15, 16, 13, 13, 16, 16, 0, 9, 11, 13, 16, + 16, 9, 10, 12, 15, 16, 10, 11, 13, 16, 16, 13, + 12, 16, 16, 16, 16, 16, 16, 16, 0, 11, 13, 16, + 16, 16, 11, 13, 16, 16, 16, 12, 13, 15, 16, 0, + 16, 16, 16, 16, 0, 16, 16, 0, 0, 0, 6, 8, + 11, 13, 16, 8, 8, 10, 12, 16, 11, 10, 11, 13, + 16, 12, 13, 13, 15, 16, 16, 16, 14, 16, 0, 6, + 8, 10, 13, 16, 8, 8, 10, 12, 16, 10, 10, 11, + 13, 16, 13, 12, 13, 16, 16, 14, 14, 14, 16, 0, + 8, 9, 11, 13, 16, 8, 9, 11, 16, 14, 10, 10, + 12, 15, 16, 12, 12, 13, 16, 16, 15, 16, 16, 16, + 0, 10, 12, 15, 16, 16, 10, 12, 12, 14, 16, 12, + 12, 13, 16, 16, 14, 15, 16, 16, 0, 16, 16, 16, + 0, 0, 12, 15, 15, 16, 0, 13, 13, 16, 16, 0, + 14, 16, 16, 16, 0, 16, 16, 16, 0, 0, 0, 0, + 0, 0, 0, 8, 10, 13, 15, 16, 10, 11, 13, 16, + 16, 13, 13, 14, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 0, 8, 10, 11, 15, 16, 9, 10, 12, + 16, 16, 12, 12, 15, 16, 16, 16, 14, 16, 16, 16, + 16, 16, 16, 16, 0, 9, 11, 14, 16, 16, 10, 11, + 13, 16, 16, 14, 13, 14, 16, 16, 16, 15, 15, 16, + 0, 16, 16, 16, 0, 0, 11, 13, 16, 16, 16, 11, + 13, 15, 16, 16, 13, 16, 16, 16, 0, 16, 16, 16, + 16, 0, 16, 16, 0, 0, 0, 15, 16, 16, 16, 0, + 14, 16, 16, 16, 0, 16, 16, 16, 0, 0, 16, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 9, 13, 16, 16, + 16, 11, 13, 16, 16, 16, 14, 15, 16, 16, 0, 15, + 16, 16, 16, 0, 16, 16, 0, 0, 0, 9, 13, 15, + 15, 16, 12, 13, 14, 16, 16, 16, 15, 16, 16, 0, + 16, 16, 16, 16, 0, 16, 16, 0, 0, 0, 11, 13, + 15, 16, 0, 12, 14, 16, 16, 0, 16, 16, 16, 16, + 0, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 16, + 16, 16, 16, 0, 16, 16, 16, 16, 0, 16, 16, 16, + 0, 0, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, + 16, 16, 0, 0, 0, 16, 16, +}; + + +static const uint16_t cvh_huffcodes3[607] = { + 0x0000,0x0004,0x0022,0x00c6,0x03b0,0x000c,0x000d,0x0023,0x00c7,0x03b1,0x005c,0x00c8, + 0x00c9,0x03b2,0x0fa4,0x01c2,0x01c3,0x03b3,0x0fa5,0x7f72,0x03b4,0x07b2,0x1f9a,0xff24, + 0xff25,0x000e,0x0024,0x00ca,0x03b5,0x07b3,0x000f,0x0025,0x00cb,0x03b6,0x0fa6,0x005d, + 0x005e,0x00cc,0x03b7,0x1f9b,0x01c4,0x01c5,0x03b8,0x0fa7,0x7f73,0x0fa8,0x07b4,0x1f9c, + 0xff26,0xff27,0x005f,0x01c6,0x03b9,0x0fa9,0x7f74,0x0060,0x00cd,0x03ba,0x0faa,0x1f9d, + 0x01c7,0x01c8,0x07b5,0x1f9e,0xff28,0x07b6,0x07b7,0x0fab,0x3fa2,0xff29,0x0fac,0x0fad, + 0x3fa3,0xff2a,0x3fa2,0x01c9,0x07b8,0x0fae,0xff2b,0xff2c,0x01ca,0x03bb,0x1f9f,0x7f75, + 0xff2d,0x03bc,0x07b9,0x0faf,0xff2e,0xff2f,0x1fa0,0x1fa1,0xff30,0xff31,0xff32,0xff33, + 0xff34,0x7f76,0xff35,0xff31,0x07ba,0x1fa2,0xff36,0xff37,0x7f77,0x07bb,0x1fa3,0x7f78, + 0xff38,0xff39,0x1fa4,0x1fa5,0xff3a,0xff3b,0xff2e,0x3fa4,0xff3c,0xff3d,0xff3e,0xff31, + 0xff3f,0xff40,0xff30,0xff31,0xff31,0x0005,0x0026,0x00ce,0x03bd,0x1fa6,0x0027,0x0028, + 0x00cf,0x03be,0x1fa7,0x01cb,0x00d0,0x03bf,0x0fb0,0xff41,0x03c0,0x03c1,0x07bc,0x7f79, + 0xff42,0x1fa8,0x0fb1,0x3fa5,0xff43,0xff44,0x0010,0x0029,0x00d1,0x07bd,0x1fa9,0x002a, + 0x002b,0x00d2,0x03c2,0x1faa,0x00d3,0x00d4,0x01cc,0x07be,0x3fa6,0x03c3,0x03c4,0x0fb2, + 0x0fb3,0xff45,0x1fab,0x0fb4,0x1fac,0x7f7a,0xff46,0x0061,0x00d5,0x01cd,0x0fb5,0xff47, + 0x0062,0x00d6,0x03c5,0x0fb6,0x3fa7,0x01ce,0x01cf,0x03c6,0x1fad,0xff48,0x07bf,0x03c7, + 0x0fb7,0x7f7b,0xff49,0x1fae,0x1faf,0xff4a,0xff4b,0x7f7b,0x01d0,0x07c0,0x1fb0,0xff4c, + 0xff4d,0x01d1,0x03c8,0x0fb8,0x7f7c,0xff4e,0x03c9,0x07c1,0x1fb1,0xff4f,0xff50,0x1fb2, + 0x0fb9,0xff51,0xff52,0xff53,0xff54,0xff55,0xff56,0xff57,0xff52,0x07c2,0x1fb3,0xff58, + 0xff59,0xff5a,0x07c3,0x1fb4,0xff5b,0xff5c,0xff5d,0x0fba,0x1fb5,0x7f7d,0xff5e,0xff4f, + 0xff5f,0xff60,0xff61,0xff62,0xff52,0xff63,0xff64,0xff51,0xff52,0xff52,0x002c,0x00d7, + 0x07c4,0x1fb6,0xff65,0x00d8,0x00d9,0x03ca,0x0fbb,0xff66,0x07c5,0x03cb,0x07c6,0x1fb7, + 0xff67,0x0fbc,0x1fb8,0x1fb9,0x7f7e,0xff68,0xff69,0xff6a,0x3fa8,0xff6b,0x7f7e,0x002d, + 0x00da,0x03cc,0x1fba,0xff6c,0x00db,0x00dc,0x03cd,0x0fbd,0xff6d,0x03ce,0x03cf,0x07c7, + 0x1fbb,0xff6e,0x1fbc,0x0fbe,0x1fbd,0xff6f,0xff70,0x3fa9,0x3faa,0x3fab,0xff71,0xff6f, + 0x00dd,0x01d2,0x07c8,0x1fbe,0xff72,0x00de,0x01d3,0x07c9,0xff73,0x3fac,0x03d0,0x03d1, + 0x0fbf,0x7f7f,0xff74,0x0fc0,0x0fc1,0x1fbf,0xff75,0xff76,0x7f80,0xff77,0xff78,0xff79, + 0xff75,0x03d2,0x0fc2,0x7f81,0xff7a,0xff7b,0x03d3,0x0fc3,0x0fc4,0x3fad,0xff7c,0x0fc5, + 0x0fc6,0x1fc0,0xff7d,0xff7e,0x3fae,0x7f82,0xff7f,0xff80,0xff80,0xff81,0xff82,0xff83, + 0xff80,0xff80,0x0fc7,0x7f83,0x7f84,0xff84,0xff7a,0x1fc1,0x1fc2,0xff85,0xff86,0x3fad, + 0x3faf,0xff87,0xff88,0xff89,0xff7d,0xff8a,0xff8b,0xff8c,0xff80,0xff80,0x3fae,0x7f82, + 0xff7f,0xff80,0xff80,0x00df,0x03d4,0x1fc3,0x7f85,0xff8d,0x03d5,0x07ca,0x1fc4,0xff8e, + 0xff8f,0x1fc5,0x1fc6,0x3fb0,0xff90,0xff91,0xff92,0xff93,0xff94,0xff95,0xff96,0xff97, + 0xff98,0xff99,0xff9a,0xff95,0x00e0,0x03d6,0x07cb,0x7f86,0xff9b,0x01d4,0x03d7,0x0fc8, + 0xff9c,0xff9d,0x0fc9,0x0fca,0x7f87,0xff9e,0xff9f,0xffa0,0x3fb1,0xffa1,0xffa2,0xffa3, + 0xffa4,0xffa5,0xffa6,0xffa7,0xffa2,0x01d5,0x07cc,0x3fb2,0xffa8,0xffa9,0x03d8,0x07cd, + 0x1fc7,0xffaa,0xffab,0x3fb3,0x1fc8,0x3fb4,0xffac,0xffad,0xffae,0x7f88,0x7f89,0xffaf, + 0xffaf,0xffb0,0xffb1,0xffb2,0xffaf,0xffaf,0x07ce,0x1fc9,0xffb3,0xffb4,0xffb5,0x07cf, + 0x1fca,0x7f8a,0xffb6,0xffb7,0x1fcb,0xffb8,0xffb9,0xffba,0xffba,0xffbb,0xffbc,0xffbd, + 0xffbe,0xffbe,0xffbf,0xffc0,0xffbd,0xffbe,0xffbe,0x7f8b,0xffc1,0xffc2,0xffc3,0xffb4, + 0x3fb5,0xffc4,0xffc5,0xffc6,0xffb6,0xffc7,0xffc8,0xffc9,0xffba,0xffba,0xffca,0xffcb, + 0xffbd,0xffbe,0xffbe,0xffbb,0xffbc,0xffbd,0xffbe,0xffbe,0x01d6,0x1fcc,0xffcc,0xffcd, + 0xffce,0x07d0,0x1fcd,0xffcf,0xffd0,0xffd1,0x3fb6,0x7f8c,0xffd2,0xffd3,0xff90,0x7f8d, + 0xffd4,0xffd5,0xffd6,0xff95,0xffd7,0xffd8,0xff94,0xff95,0xff95,0x01d7,0x1fce,0x7f8e, + 0x7f8f,0xffd9,0x0fcb,0x1fcf,0x3fb7,0xffda,0xffdb,0xffdc,0x7f90,0xffdd,0xffde,0xff9e, + 0xffdf,0xffe0,0xffe1,0xffe2,0xffa2,0xffe3,0xffe4,0xffa1,0xffa2,0xffa2,0x07d1,0x1fd0, + 0x7f91,0xffe5,0xffa8,0x0fcc,0x3fb8,0xffe6,0xffe7,0xffaa,0xffe8,0xffe9,0xffea,0xffeb, + 0xffac,0xffec,0xffed,0xffee,0xffaf,0xffaf,0xffae,0x7f88,0x7f89,0xffaf,0xffaf,0xffef, + 0xfff0,0xfff1,0xfff2,0xffb4,0xfff3,0xfff4,0xfff5,0xfff6,0xffb6,0xfff7,0xfff8,0xfff9, + 0xffba,0xffba,0xfffa,0xfffb,0xffbd,0xffbe,0xffbe,0xffbb,0xffbc,0xffbd,0xffbe,0xffbe, + 0xfffc,0xfffd,0xffb3,0xffb4,0xffb4,0xfffe,0xffff, +}; + +static const uint8_t cvh_huffbits4[246] = { + 2, 4, 7, 10, 4, 5, 7, 10, 7, 8, 10, 14, + 11, 11, 15, 15, 4, 5, 9, 12, 5, 5, 8, 12, + 8, 7, 10, 15, 11, 11, 15, 15, 7, 9, 12, 15, + 8, 8, 12, 15, 10, 10, 13, 15, 14, 14, 15, 0, + 11, 13, 15, 15, 11, 13, 15, 15, 14, 15, 15, 0, + 15, 15, 0, 0, 4, 5, 9, 13, 5, 6, 9, 13, + 9, 9, 11, 15, 14, 13, 15, 15, 4, 6, 9, 12, + 5, 6, 9, 13, 9, 8, 11, 15, 13, 12, 15, 15, + 7, 9, 12, 15, 7, 8, 11, 15, 10, 10, 14, 15, + 14, 15, 15, 0, 10, 12, 15, 15, 11, 13, 15, 15, + 15, 15, 15, 0, 15, 15, 0, 0, 6, 9, 13, 14, + 8, 9, 12, 15, 12, 12, 15, 15, 15, 15, 15, 0, + 7, 9, 13, 15, 8, 9, 12, 15, 11, 12, 15, 15, + 15, 15, 15, 0, 9, 11, 15, 15, 9, 11, 15, 15, + 14, 14, 15, 0, 15, 15, 0, 0, 14, 15, 15, 0, + 14, 15, 15, 0, 15, 15, 0, 0, 0, 0, 0, 0, + 9, 12, 15, 15, 12, 13, 15, 15, 15, 15, 15, 0, + 15, 15, 0, 0, 10, 12, 15, 15, 12, 14, 15, 15, + 15, 15, 15, 0, 15, 15, 0, 0, 14, 15, 15, 0, + 15, 15, 15, 0, 15, 15, 0, 0, 0, 0, 0, 0, + 15, 15, 0, 0, 15, 15, +}; + + +static const uint16_t cvh_huffcodes4[246] = { + 0x0000,0x0004,0x006c,0x03e6,0x0005,0x0012,0x006d,0x03e7,0x006e,0x00e8,0x03e8,0x3fc4, + 0x07e0,0x07e1,0x7fa4,0x7fa5,0x0006,0x0013,0x01e2,0x0fda,0x0014,0x0015,0x00e9,0x0fdb, + 0x00ea,0x006f,0x03e9,0x7fa6,0x07e2,0x07e3,0x7fa7,0x7fa8,0x0070,0x01e3,0x0fdc,0x7fa9, + 0x00eb,0x00ec,0x0fdd,0x7faa,0x03ea,0x03eb,0x1fd6,0x7fab,0x3fc5,0x3fc6,0x7fac,0x1fd6, + 0x07e4,0x1fd7,0x7fad,0x7fae,0x07e5,0x1fd8,0x7faf,0x7fb0,0x3fc7,0x7fb1,0x7fb2,0x1fd6, + 0x7fb3,0x7fb4,0x1fd6,0x1fd6,0x0007,0x0016,0x01e4,0x1fd9,0x0017,0x0032,0x01e5,0x1fda, + 0x01e6,0x01e7,0x07e6,0x7fb5,0x3fc8,0x1fdb,0x7fb6,0x7fb7,0x0008,0x0033,0x01e8,0x0fde, + 0x0018,0x0034,0x01e9,0x1fdc,0x01ea,0x00ed,0x07e7,0x7fb8,0x1fdd,0x0fdf,0x7fb9,0x7fba, + 0x0071,0x01eb,0x0fe0,0x7fbb,0x0072,0x00ee,0x07e8,0x7fbc,0x03ec,0x03ed,0x3fc9,0x7fbd, + 0x3fca,0x7fbe,0x7fbf,0x3fc9,0x03ee,0x0fe1,0x7fc0,0x7fc1,0x07e9,0x1fde,0x7fc2,0x7fc3, + 0x7fc4,0x7fc5,0x7fc6,0x3fc9,0x7fc7,0x7fc8,0x3fc9,0x3fc9,0x0035,0x01ec,0x1fdf,0x3fcb, + 0x00ef,0x01ed,0x0fe2,0x7fc9,0x0fe3,0x0fe4,0x7fca,0x7fcb,0x7fcc,0x7fcd,0x7fce,0x7fca, + 0x0073,0x01ee,0x1fe0,0x7fcf,0x00f0,0x01ef,0x0fe5,0x7fd0,0x07ea,0x0fe6,0x7fd1,0x7fd2, + 0x7fd3,0x7fd4,0x7fd5,0x7fd1,0x01f0,0x07eb,0x7fd6,0x7fd7,0x01f1,0x07ec,0x7fd8,0x7fd9, + 0x3fcc,0x3fcd,0x7fda,0x7fda,0x7fdb,0x7fdc,0x7fda,0x7fda,0x3fce,0x7fdd,0x7fde,0x7fd6, + 0x3fcf,0x7fdf,0x7fe0,0x7fd8,0x7fe1,0x7fe2,0x7fda,0x7fda,0x3fcc,0x3fcd,0x7fda,0x7fda, + 0x01f2,0x0fe7,0x7fe3,0x7fe4,0x0fe8,0x1fe1,0x7fe5,0x7fe6,0x7fe7,0x7fe8,0x7fe9,0x7fca, + 0x7fea,0x7feb,0x7fca,0x7fca,0x03ef,0x0fe9,0x7fec,0x7fed,0x0fea,0x3fd0,0x7fee,0x7fef, + 0x7ff0,0x7ff1,0x7ff2,0x7fd1,0x7ff3,0x7ff4,0x7fd1,0x7fd1,0x3fd1,0x7ff5,0x7ff6,0x7fd6, + 0x7ff7,0x7ff8,0x7ff9,0x7fd8,0x7ffa,0x7ffb,0x7fda,0x7fda,0x3fcc,0x3fcd,0x7fda,0x7fda, + 0x7ffc,0x7ffd,0x7fd6,0x7fd6,0x7ffe,0x7fff, +}; + + +static const uint8_t cvh_huffbits5[230] = { + 2, 4, 8, 4, 5, 9, 9, 10, 14, 4, 6, 11, + 5, 6, 12, 10, 11, 15, 9, 11, 15, 10, 13, 15, + 14, 15, 0, 4, 6, 12, 6, 7, 12, 12, 12, 15, + 5, 7, 13, 6, 7, 13, 12, 13, 15, 10, 12, 15, + 11, 13, 15, 15, 15, 0, 8, 13, 15, 11, 12, 15, + 15, 15, 0, 10, 13, 15, 12, 15, 15, 15, 15, 0, + 15, 15, 0, 15, 15, 0, 0, 0, 0, 4, 5, 11, + 5, 7, 12, 11, 12, 15, 6, 7, 13, 7, 8, 14, + 12, 14, 15, 11, 13, 15, 12, 13, 15, 15, 15, 0, + 5, 6, 13, 7, 8, 15, 12, 14, 15, 6, 8, 14, + 7, 8, 15, 14, 15, 15, 12, 12, 15, 12, 13, 15, + 15, 15, 0, 9, 13, 15, 12, 13, 15, 15, 15, 0, + 11, 13, 15, 13, 13, 15, 15, 15, 0, 14, 15, 0, + 15, 15, 0, 0, 0, 0, 8, 10, 15, 11, 12, 15, + 15, 15, 0, 10, 12, 15, 12, 13, 15, 15, 15, 0, + 14, 15, 0, 15, 15, 0, 0, 0, 0, 8, 12, 15, + 12, 13, 15, 15, 15, 0, 11, 13, 15, 13, 15, 15, + 15, 15, 0, 15, 15, 0, 15, 15, 0, 0, 0, 0, + 14, 15, 0, 15, 15, 0, 0, 0, 0, 15, 15, 0, + 15, 15, +}; + + + +static const uint16_t cvh_huffcodes5[230] = { + 0x0000,0x0004,0x00f0,0x0005,0x0012,0x01f0,0x01f1,0x03e8,0x3fce,0x0006,0x0030,0x07de, + 0x0013,0x0031,0x0fd2,0x03e9,0x07df,0x7fb0,0x01f2,0x07e0,0x7fb1,0x03ea,0x1fd2,0x7fb2, + 0x3fcf,0x7fb3,0x0031,0x0007,0x0032,0x0fd3,0x0033,0x0070,0x0fd4,0x0fd5,0x0fd6,0x7fb4, + 0x0014,0x0071,0x1fd3,0x0034,0x0072,0x1fd4,0x0fd7,0x1fd5,0x7fb5,0x03eb,0x0fd8,0x7fb6, + 0x07e1,0x1fd6,0x7fb7,0x7fb8,0x7fb9,0x0072,0x00f1,0x1fd7,0x7fba,0x07e2,0x0fd9,0x7fbb, + 0x7fbc,0x7fbd,0x0070,0x03ec,0x1fd8,0x7fbe,0x0fda,0x7fbf,0x7fc0,0x7fc1,0x7fc2,0x0072, + 0x7fc3,0x7fc4,0x0071,0x7fc5,0x7fc6,0x0072,0x0034,0x0072,0x0072,0x0008,0x0015,0x07e3, + 0x0016,0x0073,0x0fdb,0x07e4,0x0fdc,0x7fc7,0x0035,0x0074,0x1fd9,0x0075,0x00f2,0x3fd0, + 0x0fdd,0x3fd1,0x7fc8,0x07e5,0x1fda,0x7fc9,0x0fde,0x1fdb,0x7fca,0x7fcb,0x7fcc,0x00f2, + 0x0017,0x0036,0x1fdc,0x0076,0x00f3,0x7fcd,0x0fdf,0x3fd2,0x7fce,0x0037,0x00f4,0x3fd3, + 0x0077,0x00f5,0x7fcf,0x3fd4,0x7fd0,0x7fd1,0x0fe0,0x0fe1,0x7fd2,0x0fe2,0x1fdd,0x7fd3, + 0x7fd4,0x7fd5,0x00f5,0x01f3,0x1fde,0x7fd6,0x0fe3,0x1fdf,0x7fd7,0x7fd8,0x7fd9,0x00f3, + 0x07e6,0x1fe0,0x7fda,0x1fe1,0x1fe2,0x7fdb,0x7fdc,0x7fdd,0x00f5,0x3fd5,0x7fde,0x00f4, + 0x7fdf,0x7fe0,0x00f5,0x0077,0x00f5,0x00f5,0x00f6,0x03ed,0x7fe1,0x07e7,0x0fe4,0x7fe2, + 0x7fe3,0x7fe4,0x0073,0x03ee,0x0fe5,0x7fe5,0x0fe6,0x1fe3,0x7fe6,0x7fe7,0x7fe8,0x00f2, + 0x3fd6,0x7fe9,0x0074,0x7fea,0x7feb,0x00f2,0x0075,0x00f2,0x00f2,0x00f7,0x0fe7,0x7fec, + 0x0fe8,0x1fe4,0x7fed,0x7fee,0x7fef,0x00f3,0x07e8,0x1fe5,0x7ff0,0x1fe6,0x7ff1,0x7ff2, + 0x7ff3,0x7ff4,0x00f5,0x7ff5,0x7ff6,0x00f4,0x7ff7,0x7ff8,0x00f5,0x0077,0x00f5,0x00f5, + 0x3fd7,0x7ff9,0x0036,0x7ffa,0x7ffb,0x00f3,0x0076,0x00f3,0x00f3,0x7ffc,0x7ffd,0x0000, + 0x7ffe,0x7fff, +}; + + +static const uint8_t cvh_huffbits6[32] = { + 1, 4, 4, 6, 4, 6, 6, 8, 4, 6, 6, 8, + 6, 9, 8, 10, 4, 6, 7, 8, 6, 9, 8, 11, + 6, 9, 8, 10, 8, 10, 9, 11, +}; + +static const uint16_t cvh_huffcodes6[32] = { + 0x0000,0x0008,0x0009,0x0034,0x000a,0x0035,0x0036,0x00f6,0x000b,0x0037,0x0038,0x00f7, + 0x0039,0x01fa,0x00f8,0x03fc,0x000c,0x003a,0x007a,0x00f9,0x003b,0x01fb,0x00fa,0x07fe, + 0x003c,0x01fc,0x00fb,0x03fd,0x00fc,0x03fe,0x01fd,0x07ff, +}; + +static const uint16_t* cvh_huffcodes[7] = { + cvh_huffcodes0, cvh_huffcodes1, cvh_huffcodes2, cvh_huffcodes3, + cvh_huffcodes4, cvh_huffcodes5, cvh_huffcodes6, +}; + +static const uint8_t* cvh_huffbits[7] = { + cvh_huffbits0, cvh_huffbits1, cvh_huffbits2, cvh_huffbits3, + cvh_huffbits4, cvh_huffbits5, cvh_huffbits6, +}; + + +static const uint16_t ccpl_huffcodes2[3] = { + 0x02,0x00,0x03, +}; + +static const uint16_t ccpl_huffcodes3[7] = { + 0x3e,0x1e,0x02,0x00,0x06,0x0e,0x3f, +}; + +static const uint16_t ccpl_huffcodes4[15] = { + 0xfc,0xfd,0x7c,0x3c,0x1c,0x0c,0x04,0x00,0x05,0x0d,0x1d,0x3d, + 0x7d,0xfe,0xff, +}; + +static const uint16_t ccpl_huffcodes5[31] = { + 0x03f8,0x03f9,0x03fa,0x03fb,0x01f8,0x01f9,0x00f8,0x00f9,0x0078,0x0079,0x0038,0x0039, + 0x0018,0x0019,0x0004,0x0000,0x0005,0x001a,0x001b,0x003a,0x003b,0x007a,0x007b,0x00fa, + 0x00fb,0x01fa,0x01fb,0x03fc,0x03fd,0x03fe,0x03ff, +}; + +static const uint16_t ccpl_huffcodes6[63] = { + 0x0004,0x0005,0x0005,0x0006,0x0006,0x0007,0x0007,0x0007,0x0007,0x0008,0x0008,0x0008, + 0x0008,0x0009,0x0009,0x0009,0x0009,0x000a,0x000a,0x000a,0x000a,0x000a,0x000b,0x000b, + 0x000b,0x000b,0x000c,0x000d,0x000e,0x000e,0x0010,0x0000,0x000a,0x0018,0x0019,0x0036, + 0x0037,0x0074,0x0075,0x0076,0x0077,0x00f4,0x00f5,0x00f6,0x00f7,0x01f5,0x01f6,0x01f7, + 0x01f8,0x03f6,0x03f7,0x03f8,0x03f9,0x03fa,0x07fa,0x07fb,0x07fc,0x07fd,0x0ffd,0x1ffd, + 0x3ffd,0x3ffe,0xffff, +}; + +static const uint8_t ccpl_huffbits2[3] = { + 2,1,2, +}; + +static const uint8_t ccpl_huffbits3[7] = { + 6,5,2,1,3,4,6, +}; + +static const uint8_t ccpl_huffbits4[15] = { + 8,8,7,6,5,4,3,1,3,4,5,6,7,8,8, +}; + +static const uint8_t ccpl_huffbits5[31] = { + 10,10,10,10,9,9,8,8,7,7,6,6, + 5,5,3,1,3,5,5,6,6,7,7,8, + 8,9,9,10,10,10,10, +}; + +static const uint8_t ccpl_huffbits6[63] = { + 16,15,14,13,12,11,11,11,11,10,10,10, + 10,9,9,9,9,9,8,8,8,8,7,7, + 7,7,6,6,5,5,3,1,4,5,5,6, + 6,7,7,7,7,8,8,8,8,9,9,9, + 9,10,10,10,10,10,11,11,11,11,12,13, + 14,14,16, +}; + +static const uint16_t* ccpl_huffcodes[5] = { + ccpl_huffcodes2,ccpl_huffcodes3, + ccpl_huffcodes4,ccpl_huffcodes5,ccpl_huffcodes6 +}; + +static const uint8_t* ccpl_huffbits[5] = { + ccpl_huffbits2,ccpl_huffbits3, + ccpl_huffbits4,ccpl_huffbits5,ccpl_huffbits6 +}; + + +//Coupling tables + +static const int cplband[51] = { + 0,1,2,3,4,5,6,7,8,9, + 10,11,11,12,12,13,13,14,14,14, + 15,15,15,15,16,16,16,16,16,17, + 17,17,17,17,17,18,18,18,18,18, + 18,18,19,19,19,19,19,19,19,19, + 19, +}; + +static const float cplscale2[3] = { +0.953020632266998,0.70710676908493,0.302905440330505, +}; + +static const float cplscale3[7] = { +0.981279790401459,0.936997592449188,0.875934481620789,0.70710676908493, +0.482430040836334,0.349335819482803,0.192587479948997, +}; + +static const float cplscale4[15] = { +0.991486728191376,0.973249018192291,0.953020632266998,0.930133521556854, +0.903453230857849,0.870746195316315,0.826180458068848,0.70710676908493, +0.563405573368073,0.491732746362686,0.428686618804932,0.367221474647522, +0.302905440330505,0.229752898216248,0.130207896232605, +}; + +static const float cplscale5[31] = { +0.995926380157471,0.987517595291138,0.978726446628571,0.969505727291107, +0.95979779958725,0.949531257152557,0.938616216182709,0.926936149597168, +0.914336204528809,0.900602877140045,0.885426938533783,0.868331849575043, +0.84851086139679,0.824381768703461,0.791833400726318,0.70710676908493, +0.610737144947052,0.566034197807312,0.529177963733673,0.495983630418777, +0.464778542518616,0.434642940759659,0.404955863952637,0.375219136476517, +0.344963222742081,0.313672333955765,0.280692428350449,0.245068684220314, +0.205169528722763,0.157508864998817,0.0901700109243393, +}; + +static const float cplscale6[63] = { +0.998005926609039,0.993956744670868,0.989822506904602,0.985598564147949, +0.981279790401459,0.976860702037811,0.972335040569305,0.967696130275726, +0.962936460971832,0.958047747612000,0.953020632266998,0.947844684123993, +0.942508161067963,0.936997592449188,0.931297719478607,0.925390899181366, +0.919256627559662,0.912870943546295,0.906205296516418,0.899225592613220, +0.891890347003937,0.884148240089417,0.875934481620789,0.867165684700012, +0.857730865478516,0.847477376461029,0.836184680461884,0.823513329029083, +0.808890223503113,0.791194140911102,0.767520070075989,0.707106769084930, +0.641024887561798,0.611565053462982,0.587959706783295,0.567296981811523, +0.548448026180267,0.530831515789032,0.514098942279816,0.498019754886627, +0.482430040836334,0.467206478118896,0.452251672744751,0.437485188245773, +0.422837972640991,0.408248275518417,0.393658757209778,0.379014074802399, +0.364258885383606,0.349335819482803,0.334183186292648,0.318732559680939, +0.302905440330505,0.286608695983887,0.269728302955627,0.252119421958923, +0.233590632677078,0.213876649737358,0.192587479948997,0.169101938605309, +0.142307326197624,0.109772264957428,0.0631198287010193, +}; + +static const float* cplscales[5] = { + cplscale2, cplscale3, cplscale4, cplscale5, cplscale6, +}; diff --git a/mpeg4/src/libavcodec/cscd.c b/mpeg4/src/libavcodec/cscd.c new file mode 100644 index 0000000000000000000000000000000000000000..0d6e045260f7d877392f0505e8999372d769d1ef --- /dev/null +++ b/mpeg4/src/libavcodec/cscd.c @@ -0,0 +1,262 @@ +/* + * CamStudio decoder + * Copyright (c) 2006 Reimar Doeffinger + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include + +#include "common.h" +#include "avcodec.h" + +#ifdef CONFIG_ZLIB +#include +#endif +#include "lzo.h" + +typedef struct { + AVFrame pic; + int linelen, height, bpp; + unsigned int decomp_size; + unsigned char* decomp_buf; +} CamStudioContext; + +static void copy_frame_default(AVFrame *f, uint8_t *src, + int linelen, int height) { + int i; + uint8_t *dst = f->data[0]; + dst += (height - 1) * f->linesize[0]; + for (i = height; i; i--) { + memcpy(dst, src, linelen); + src += linelen; + dst -= f->linesize[0]; + } +} + +static void add_frame_default(AVFrame *f, uint8_t *src, + int linelen, int height) { + int i, j; + uint8_t *dst = f->data[0]; + dst += (height - 1) * f->linesize[0]; + for (i = height; i; i--) { + for (j = linelen; j; j--) + *dst++ += *src++; + dst -= f->linesize[0] + linelen; + } +} + +#ifndef WORDS_BIGENDIAN +#define copy_frame_16 copy_frame_default +#define copy_frame_32 copy_frame_default +#define add_frame_16 add_frame_default +#define add_frame_32 add_frame_default +#else +static void copy_frame_16(AVFrame *f, uint8_t *src, + int linelen, int height) { + int i, j; + uint8_t *dst = f->data[0]; + dst += (height - 1) * f->linesize[0]; + for (i = height; i; i--) { + for (j = linelen / 2; j; j--) { + dst[0] = src[1]; + dst[1] = src[0]; + src += 2; + dst += 2; + } + dst -= f->linesize[0] + linelen; + } +} + +static void copy_frame_32(AVFrame *f, uint8_t *src, + int linelen, int height) { + int i, j; + uint8_t *dst = f->data[0]; + dst += (height - 1) * f->linesize[0]; + for (i = height; i; i--) { + for (j = linelen / 4; j; j--) { + dst[0] = src[3]; + dst[1] = src[2]; + dst[2] = src[1]; + dst[3] = src[0]; + src += 4; + dst += 4; + } + dst -= f->linesize[0] + linelen; + } +} + +static void add_frame_16(AVFrame *f, uint8_t *src, + int linelen, int height) { + int i, j; + uint8_t *dst = f->data[0]; + dst += (height - 1) * f->linesize[0]; + for (i = height; i; i--) { + for (j = linelen / 2; j; j--) { + dst[0] += src[1]; + dst[1] += src[0]; + src += 2; + dst += 2; + } + dst -= f->linesize[0] + linelen; + } +} + +static void add_frame_32(AVFrame *f, uint8_t *src, + int linelen, int height) { + int i, j; + uint8_t *dst = f->data[0]; + dst += (height - 1) * f->linesize[0]; + for (i = height; i; i--) { + for (j = linelen / 4; j; j--) { + dst[0] += src[3]; + dst[1] += src[2]; + dst[2] += src[1]; + dst[3] += src[0]; + src += 4; + dst += 4; + } + dst -= f->linesize[0] + linelen; + } +} +#endif + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, + uint8_t *buf, int buf_size) { + CamStudioContext *c = (CamStudioContext *)avctx->priv_data; + AVFrame *picture = data; + + if (buf_size < 2) { + av_log(avctx, AV_LOG_ERROR, "coded frame too small\n"); + return -1; + } + + if (c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + c->pic.reference = 1; + c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_READABLE | + FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if (avctx->get_buffer(avctx, &c->pic) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + // decompress data + switch ((buf[0] >> 1) & 7) { + case 0: { // lzo compression + int outlen = c->decomp_size, inlen = buf_size - 2; + if (lzo1x_decode(c->decomp_buf, &outlen, &buf[2], &inlen)) + av_log(avctx, AV_LOG_ERROR, "error during lzo decompression\n"); + break; + } + case 1: { // zlib compression +#ifdef CONFIG_ZLIB + unsigned long dlen = c->decomp_size; + if (uncompress(c->decomp_buf, &dlen, &buf[2], buf_size - 2) != Z_OK) + av_log(avctx, AV_LOG_ERROR, "error during zlib decompression\n"); + break; +#else + av_log(avctx, AV_LOG_ERROR, "compiled without zlib support\n"); + return -1; +#endif + } + default: + av_log(avctx, AV_LOG_ERROR, "unknown compression\n"); + return -1; + } + + // flip upside down, add difference frame + if (buf[0] & 1) { // keyframe + c->pic.pict_type = FF_I_TYPE; + c->pic.key_frame = 1; + switch (c->bpp) { + case 16: + copy_frame_16(&c->pic, c->decomp_buf, c->linelen, c->height); + break; + case 32: + copy_frame_32(&c->pic, c->decomp_buf, c->linelen, c->height); + break; + default: + copy_frame_default(&c->pic, c->decomp_buf, c->linelen, c->height); + } + } else { + c->pic.pict_type = FF_P_TYPE; + c->pic.key_frame = 0; + switch (c->bpp) { + case 16: + add_frame_16(&c->pic, c->decomp_buf, c->linelen, c->height); + break; + case 32: + add_frame_32(&c->pic, c->decomp_buf, c->linelen, c->height); + break; + default: + add_frame_default(&c->pic, c->decomp_buf, c->linelen, c->height); + } + } + + *picture = c->pic; + *data_size = sizeof(AVFrame); + return buf_size; +} + +static int decode_init(AVCodecContext *avctx) { + CamStudioContext *c = (CamStudioContext *)avctx->priv_data; + if (avcodec_check_dimensions(avctx, avctx->height, avctx->width) < 0) { + return 1; + } + avctx->has_b_frames = 0; + switch (avctx->bits_per_sample) { + case 16: avctx->pix_fmt = PIX_FMT_RGB565; break; + case 24: avctx->pix_fmt = PIX_FMT_BGR24; break; + case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break; + default: + av_log(avctx, AV_LOG_ERROR, + "CamStudio codec error: unvalid depth %i bpp\n", + avctx->bits_per_sample); + return 1; + } + c->bpp = avctx->bits_per_sample; + c->pic.data[0] = NULL; + c->linelen = avctx->width * avctx->bits_per_sample / 8; + c->height = avctx->height; + c->decomp_size = c->height * c->linelen; + c->decomp_buf = av_malloc(c->decomp_size + LZO_OUTPUT_PADDING); + if (!c->decomp_buf) { + av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n"); + return 1; + } + return 0; +} + +static int decode_end(AVCodecContext *avctx) { + CamStudioContext *c = (CamStudioContext *)avctx->priv_data; + av_freep(&c->decomp_buf); + if (c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + return 0; +} + +AVCodec cscd_decoder = { + "camstudio", + CODEC_TYPE_VIDEO, + CODEC_ID_CSCD, + sizeof(CamStudioContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; + diff --git a/mpeg4/src/libavcodec/cyuv.c b/mpeg4/src/libavcodec/cyuv.c new file mode 100644 index 0000000000000000000000000000000000000000..b64e1a58bf732ef357f5813346c960d02dda7f9e --- /dev/null +++ b/mpeg4/src/libavcodec/cyuv.c @@ -0,0 +1,188 @@ +/* + * + * Copyright (C) 2003 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Creative YUV (CYUV) Video Decoder + * by Mike Melanson (melanson@pcisys.net) + * based on "Creative YUV (CYUV) stream format for AVI": + * http://www.csse.monash.edu.au/~timf/videocodec/cyuv.txt + * + */ + +/** + * @file cyuv.c + * Creative YUV (CYUV) Video Decoder. + */ + +#include +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + + +typedef struct CyuvDecodeContext { + AVCodecContext *avctx; + int width, height; + AVFrame frame; +} CyuvDecodeContext; + +static int cyuv_decode_init(AVCodecContext *avctx) +{ + CyuvDecodeContext *s = avctx->priv_data; + + s->avctx = avctx; + s->width = avctx->width; + /* width needs to be divisible by 4 for this codec to work */ + if (s->width & 0x3) + return -1; + s->height = avctx->height; + avctx->pix_fmt = PIX_FMT_YUV411P; + avctx->has_b_frames = 0; + + return 0; +} + +static int cyuv_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + CyuvDecodeContext *s=avctx->priv_data; + + unsigned char *y_plane; + unsigned char *u_plane; + unsigned char *v_plane; + int y_ptr; + int u_ptr; + int v_ptr; + + /* prediction error tables (make it clear that they are signed values) */ + signed char *y_table = buf + 0; + signed char *u_table = buf + 16; + signed char *v_table = buf + 32; + + unsigned char y_pred, u_pred, v_pred; + int stream_ptr; + unsigned char cur_byte; + int pixel_groups; + + /* sanity check the buffer size: A buffer has 3x16-bytes tables + * followed by (height) lines each with 3 bytes to represent groups + * of 4 pixels. Thus, the total size of the buffer ought to be: + * (3 * 16) + height * (width * 3 / 4) */ + if (buf_size != 48 + s->height * (s->width * 3 / 4)) { + av_log(avctx, AV_LOG_ERROR, "ffmpeg: cyuv: got a buffer with %d bytes when %d were expected\n", + buf_size, + 48 + s->height * (s->width * 3 / 4)); + return -1; + } + + /* pixel data starts 48 bytes in, after 3x16-byte tables */ + stream_ptr = 48; + + if(s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID; + s->frame.reference = 0; + if(avctx->get_buffer(avctx, &s->frame) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + y_plane = s->frame.data[0]; + u_plane = s->frame.data[1]; + v_plane = s->frame.data[2]; + + /* iterate through each line in the height */ + for (y_ptr = 0, u_ptr = 0, v_ptr = 0; + y_ptr < (s->height * s->frame.linesize[0]); + y_ptr += s->frame.linesize[0] - s->width, + u_ptr += s->frame.linesize[1] - s->width / 4, + v_ptr += s->frame.linesize[2] - s->width / 4) { + + /* reset predictors */ + cur_byte = buf[stream_ptr++]; + u_plane[u_ptr++] = u_pred = cur_byte & 0xF0; + y_plane[y_ptr++] = y_pred = (cur_byte & 0x0F) << 4; + + cur_byte = buf[stream_ptr++]; + v_plane[v_ptr++] = v_pred = cur_byte & 0xF0; + y_pred += y_table[cur_byte & 0x0F]; + y_plane[y_ptr++] = y_pred; + + cur_byte = buf[stream_ptr++]; + y_pred += y_table[cur_byte & 0x0F]; + y_plane[y_ptr++] = y_pred; + y_pred += y_table[(cur_byte & 0xF0) >> 4]; + y_plane[y_ptr++] = y_pred; + + /* iterate through the remaining pixel groups (4 pixels/group) */ + pixel_groups = s->width / 4 - 1; + while (pixel_groups--) { + + cur_byte = buf[stream_ptr++]; + u_pred += u_table[(cur_byte & 0xF0) >> 4]; + u_plane[u_ptr++] = u_pred; + y_pred += y_table[cur_byte & 0x0F]; + y_plane[y_ptr++] = y_pred; + + cur_byte = buf[stream_ptr++]; + v_pred += v_table[(cur_byte & 0xF0) >> 4]; + v_plane[v_ptr++] = v_pred; + y_pred += y_table[cur_byte & 0x0F]; + y_plane[y_ptr++] = y_pred; + + cur_byte = buf[stream_ptr++]; + y_pred += y_table[cur_byte & 0x0F]; + y_plane[y_ptr++] = y_pred; + y_pred += y_table[(cur_byte & 0xF0) >> 4]; + y_plane[y_ptr++] = y_pred; + + } + } + + *data_size=sizeof(AVFrame); + *(AVFrame*)data= s->frame; + + return buf_size; +} + +static int cyuv_decode_end(AVCodecContext *avctx) +{ +/* CyuvDecodeContext *s = avctx->priv_data;*/ + + return 0; +} + +AVCodec cyuv_decoder = { + "cyuv", + CODEC_TYPE_VIDEO, + CODEC_ID_CYUV, + sizeof(CyuvDecodeContext), + cyuv_decode_init, + NULL, + cyuv_decode_end, + cyuv_decode_frame, + CODEC_CAP_DR1, + NULL +}; + diff --git a/mpeg4/src/libavcodec/dct-test.c b/mpeg4/src/libavcodec/dct-test.c new file mode 100644 index 0000000000000000000000000000000000000000..232278c8aad10918e18f53f14c12d3b381e0ba27 --- /dev/null +++ b/mpeg4/src/libavcodec/dct-test.c @@ -0,0 +1,516 @@ +/** + * @file dct-test.c + * DCT test. (c) 2001 Fabrice Bellard. + * Started from sample code by Juan J. Sierralta P. + */ + +#include +#include +#include +#include +#include + +#include "dsputil.h" + +#include "i386/mmx.h" +#include "simple_idct.h" +#include "faandct.h" + +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#undef printf + +void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);}; + +/* reference fdct/idct */ +extern void fdct(DCTELEM *block); +extern void idct(DCTELEM *block); +extern void ff_idct_xvid_mmx(DCTELEM *block); +extern void ff_idct_xvid_mmx2(DCTELEM *block); +extern void init_fdct(); + +extern void j_rev_dct(DCTELEM *data); +extern void ff_mmx_idct(DCTELEM *data); +extern void ff_mmxext_idct(DCTELEM *data); + +extern void odivx_idct_c (short *block); + +#define AANSCALE_BITS 12 +static const unsigned short aanscales[64] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 +}; + +uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; + +int64_t gettime(void) +{ + struct timeval tv; + gettimeofday(&tv,NULL); + return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; +} + +#define NB_ITS 20000 +#define NB_ITS_SPEED 50000 + +static short idct_mmx_perm[64]; + +static short idct_simple_mmx_perm[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + +void idct_mmx_init(void) +{ + int i; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + for (i = 0; i < 64; i++) { + idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); +// idct_simple_mmx_perm[i] = simple_block_permute_op(i); + } +} + +static DCTELEM block[64] __attribute__ ((aligned (8))); +static DCTELEM block1[64] __attribute__ ((aligned (8))); +static DCTELEM block_org[64] __attribute__ ((aligned (8))); + +void dct_error(const char *name, int is_idct, + void (*fdct_func)(DCTELEM *block), + void (*fdct_ref)(DCTELEM *block), int test) +{ + int it, i, scale; + int err_inf, v; + int64_t err2, ti, ti1, it1; + int64_t sysErr[64], sysErrMax=0; + int maxout=0; + int blockSumErrMax=0, blockSumErr; + + srandom(0); + + err_inf = 0; + err2 = 0; + for(i=0; i<64; i++) sysErr[i]=0; + for(it=0;it>=3; + } + break; + case 1:{ + int num= (random()%10)+1; + for(i=0;i> AANSCALE_BITS; + } + } + + fdct_ref(block1); + + blockSumErr=0; + for(i=0;i<64;i++) { + v = abs(block[i] - block1[i]); + if (v > err_inf) + err_inf = v; + err2 += v * v; + sysErr[i] += block[i] - block1[i]; + blockSumErr += v; + if( abs(block[i])>maxout) maxout=abs(block[i]); + } + if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr; +#if 0 // print different matrix pairs + if(blockSumErr){ + printf("\n"); + for(i=0; i<64; i++){ + if((i&7)==0) printf("\n"); + printf("%4d ", block_org[i]); + } + for(i=0; i<64; i++){ + if((i&7)==0) printf("\n"); + printf("%4d ", block[i] - block1[i]); + } + } +#endif + } + for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, ABS(sysErr[i])); + +#if 1 // dump systematic errors + for(i=0; i<64; i++){ + if(i%8==0) printf("\n"); + printf("%5d ", (int)sysErr[i]); + } + printf("\n"); +#endif + + printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", + is_idct ? "IDCT" : "DCT", + name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax); +#if 1 //Speed test + /* speed test */ + for(i=0;i<64;i++) + block1[i] = 0; + switch(test){ + case 0: + for(i=0;i<64;i++) + block1[i] = (random() % 512) -256; + if (is_idct){ + fdct(block1); + + for(i=0;i<64;i++) + block1[i]>>=3; + } + break; + case 1:{ + case 2: + block1[0] = (random() % 512) -256; + block1[1] = (random() % 512) -256; + block1[2] = (random() % 512) -256; + block1[3] = (random() % 512) -256; + }break; + } + + if (fdct_func == ff_mmx_idct || + fdct_func == j_rev_dct || fdct_func == ff_mmxext_idct) { + for(i=0;i<64;i++) + block[idct_mmx_perm[i]] = block1[i]; + } else if(fdct_func == ff_simple_idct_mmx ) { + for(i=0;i<64;i++) + block[idct_simple_mmx_perm[i]] = block1[i]; + } else { + for(i=0; i<64; i++) + block[i]= block1[i]; + } + + ti = gettime(); + it1 = 0; + do { + for(it=0;it 255) + v = 255; + dest[i * linesize + j] = (int)rint(v); + } + } +} + +void idct248_error(const char *name, + void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block)) +{ + int it, i, it1, ti, ti1, err_max, v; + + srandom(0); + + /* just one test to see if code is correct (precision is less + important here) */ + err_max = 0; + for(it=0;it err_max) + err_max = v; + } +#if 0 + printf("ref=\n"); + for(i=0;i<8;i++) { + int j; + for(j=0;j<8;j++) { + printf(" %3d", img_dest1[i*8+j]); + } + printf("\n"); + } + + printf("out=\n"); + for(i=0;i<8;i++) { + int j; + for(j=0;j<8;j++) { + printf(" %3d", img_dest[i*8+j]); + } + printf("\n"); + } +#endif + } + printf("%s %s: err_inf=%d\n", + 1 ? "IDCT248" : "DCT248", + name, err_max); + + ti = gettime(); + it1 = 0; + do { + for(it=0;it]\n" + "test-number 0 -> test with random matrixes\n" + " 1 -> test with random sparse matrixes\n" + " 2 -> do 3. test from mpeg4 std\n" + "-i test IDCT implementations\n" + "-4 test IDCT248 implementations\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int test_idct = 0, test_248_dct = 0; + int c,i; + int test=1; + + init_fdct(); + idct_mmx_init(); + + for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; + for(i=0;i 32767) x = 32767; +#define SE_16BIT(x) if (x & 0x8000) x -= 0x10000; + +static int interplay_delta_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 47, 51, 56, 61, + 66, 72, 79, 86, 94, 102, 112, 122, + 133, 145, 158, 173, 189, 206, 225, 245, + 267, 292, 318, 348, 379, 414, 452, 493, + 538, 587, 640, 699, 763, 832, 908, 991, + 1081, 1180, 1288, 1405, 1534, 1673, 1826, 1993, + 2175, 2373, 2590, 2826, 3084, 3365, 3672, 4008, + 4373, 4772, 5208, 5683, 6202, 6767, 7385, 8059, + 8794, 9597, 10472, 11428, 12471, 13609, 14851, 16206, + 17685, 19298, 21060, 22981, 25078, 27367, 29864, 32589, + -29973, -26728, -23186, -19322, -15105, -10503, -5481, -1, + 1, 1, 5481, 10503, 15105, 19322, 23186, 26728, + 29973, -32589, -29864, -27367, -25078, -22981, -21060, -19298, + -17685, -16206, -14851, -13609, -12471, -11428, -10472, -9597, + -8794, -8059, -7385, -6767, -6202, -5683, -5208, -4772, + -4373, -4008, -3672, -3365, -3084, -2826, -2590, -2373, + -2175, -1993, -1826, -1673, -1534, -1405, -1288, -1180, + -1081, -991, -908, -832, -763, -699, -640, -587, + -538, -493, -452, -414, -379, -348, -318, -292, + -267, -245, -225, -206, -189, -173, -158, -145, + -133, -122, -112, -102, -94, -86, -79, -72, + -66, -61, -56, -51, -47, -43, -42, -41, + -40, -39, -38, -37, -36, -35, -34, -33, + -32, -31, -30, -29, -28, -27, -26, -25, + -24, -23, -22, -21, -20, -19, -18, -17, + -16, -15, -14, -13, -12, -11, -10, -9, + -8, -7, -6, -5, -4, -3, -2, -1 + +}; + +static int sol_table_old[16] = + { 0x0, 0x1, 0x2 , 0x3, 0x6, 0xA, 0xF, 0x15, + -0x15, -0xF, -0xA, -0x6, -0x3, -0x2, -0x1, 0x0}; + +static int sol_table_new[16] = + { 0x0, 0x1, 0x2, 0x3, 0x6, 0xA, 0xF, 0x15, + 0x0, -0x1, -0x2, -0x3, -0x6, -0xA, -0xF, -0x15}; + +static int sol_table_16[128] = { + 0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080, + 0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120, + 0x130, 0x140, 0x150, 0x160, 0x170, 0x180, 0x190, 0x1A0, 0x1B0, 0x1C0, + 0x1D0, 0x1E0, 0x1F0, 0x200, 0x208, 0x210, 0x218, 0x220, 0x228, 0x230, + 0x238, 0x240, 0x248, 0x250, 0x258, 0x260, 0x268, 0x270, 0x278, 0x280, + 0x288, 0x290, 0x298, 0x2A0, 0x2A8, 0x2B0, 0x2B8, 0x2C0, 0x2C8, 0x2D0, + 0x2D8, 0x2E0, 0x2E8, 0x2F0, 0x2F8, 0x300, 0x308, 0x310, 0x318, 0x320, + 0x328, 0x330, 0x338, 0x340, 0x348, 0x350, 0x358, 0x360, 0x368, 0x370, + 0x378, 0x380, 0x388, 0x390, 0x398, 0x3A0, 0x3A8, 0x3B0, 0x3B8, 0x3C0, + 0x3C8, 0x3D0, 0x3D8, 0x3E0, 0x3E8, 0x3F0, 0x3F8, 0x400, 0x440, 0x480, + 0x4C0, 0x500, 0x540, 0x580, 0x5C0, 0x600, 0x640, 0x680, 0x6C0, 0x700, + 0x740, 0x780, 0x7C0, 0x800, 0x900, 0xA00, 0xB00, 0xC00, 0xD00, 0xE00, + 0xF00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x3000, 0x4000 +}; + + + +static int dpcm_decode_init(AVCodecContext *avctx) +{ + DPCMContext *s = avctx->priv_data; + int i; + short square; + + s->channels = avctx->channels; + s->sample[0] = s->sample[1] = 0; + + switch(avctx->codec->id) { + + case CODEC_ID_ROQ_DPCM: + /* initialize square table */ + for (i = 0; i < 128; i++) { + square = i * i; + s->roq_square_array[i] = square; + s->roq_square_array[i + 128] = -square; + } + break; + + + case CODEC_ID_SOL_DPCM: + switch(avctx->codec_tag){ + case 1: + s->sol_table=sol_table_old; + s->sample[0] = s->sample[1] = 0x80; + break; + case 2: + s->sol_table=sol_table_new; + s->sample[0] = s->sample[1] = 0x80; + break; + case 3: + s->sol_table=sol_table_16; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unknown SOL subcodec\n"); + return -1; + } + break; + + default: + break; + } + + return 0; +} + +static int dpcm_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + DPCMContext *s = avctx->priv_data; + int in, out = 0; + int predictor[2]; + int channel_number = 0; + short *output_samples = data; + int shift[2]; + unsigned char byte; + short diff; + + if (!buf_size) + return 0; + + switch(avctx->codec->id) { + + case CODEC_ID_ROQ_DPCM: + if (s->channels == 1) + predictor[0] = LE_16(&buf[6]); + else { + predictor[0] = buf[7] << 8; + predictor[1] = buf[6] << 8; + } + SE_16BIT(predictor[0]); + SE_16BIT(predictor[1]); + + /* decode the samples */ + for (in = 8, out = 0; in < buf_size; in++, out++) { + predictor[channel_number] += s->roq_square_array[buf[in]]; + SATURATE_S16(predictor[channel_number]); + output_samples[out] = predictor[channel_number]; + + /* toggle channel */ + channel_number ^= s->channels - 1; + } + break; + + case CODEC_ID_INTERPLAY_DPCM: + in = 6; /* skip over the stream mask and stream length */ + predictor[0] = LE_16(&buf[in]); + in += 2; + SE_16BIT(predictor[0]) + output_samples[out++] = predictor[0]; + if (s->channels == 2) { + predictor[1] = LE_16(&buf[in]); + in += 2; + SE_16BIT(predictor[1]) + output_samples[out++] = predictor[1]; + } + + while (in < buf_size) { + predictor[channel_number] += interplay_delta_table[buf[in++]]; + SATURATE_S16(predictor[channel_number]); + output_samples[out++] = predictor[channel_number]; + + /* toggle channel */ + channel_number ^= s->channels - 1; + } + + break; + + case CODEC_ID_XAN_DPCM: + in = 0; + shift[0] = shift[1] = 4; + predictor[0] = LE_16(&buf[in]); + in += 2; + SE_16BIT(predictor[0]); + if (s->channels == 2) { + predictor[1] = LE_16(&buf[in]); + in += 2; + SE_16BIT(predictor[1]); + } + + while (in < buf_size) { + byte = buf[in++]; + diff = (byte & 0xFC) << 8; + if ((byte & 0x03) == 3) + shift[channel_number]++; + else + shift[channel_number] -= (2 * (byte & 3)); + /* saturate the shifter to a lower limit of 0 */ + if (shift[channel_number] < 0) + shift[channel_number] = 0; + + diff >>= shift[channel_number]; + predictor[channel_number] += diff; + + SATURATE_S16(predictor[channel_number]); + output_samples[out++] = predictor[channel_number]; + + /* toggle channel */ + channel_number ^= s->channels - 1; + } + break; + case CODEC_ID_SOL_DPCM: + in = 0; + if (avctx->codec_tag != 3) { + while (in < buf_size) { + int n1, n2; + n1 = (buf[in] >> 4) & 0xF; + n2 = buf[in++] & 0xF; + s->sample[0] += s->sol_table[n1]; + if (s->sample[0] < 0) s->sample[0] = 0; + if (s->sample[0] > 255) s->sample[0] = 255; + output_samples[out++] = (s->sample[0] - 128) << 8; + s->sample[s->channels - 1] += s->sol_table[n2]; + if (s->sample[s->channels - 1] < 0) s->sample[s->channels - 1] = 0; + if (s->sample[s->channels - 1] > 255) s->sample[s->channels - 1] = 255; + output_samples[out++] = (s->sample[s->channels - 1] - 128) << 8; + } + } else { + while (in < buf_size) { + int n; + n = buf[in++]; + if (n & 0x80) s->sample[channel_number] -= s->sol_table[n & 0x7F]; + else s->sample[channel_number] += s->sol_table[n & 0x7F]; + SATURATE_S16(s->sample[channel_number]); + output_samples[out++] = s->sample[channel_number]; + /* toggle channel */ + channel_number ^= s->channels - 1; + } + } + break; + } + + *data_size = out * sizeof(short); + return buf_size; +} + +AVCodec roq_dpcm_decoder = { + "roq_dpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_ROQ_DPCM, + sizeof(DPCMContext), + dpcm_decode_init, + NULL, + NULL, + dpcm_decode_frame, +}; + +AVCodec interplay_dpcm_decoder = { + "interplay_dpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_INTERPLAY_DPCM, + sizeof(DPCMContext), + dpcm_decode_init, + NULL, + NULL, + dpcm_decode_frame, +}; + +AVCodec xan_dpcm_decoder = { + "xan_dpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_XAN_DPCM, + sizeof(DPCMContext), + dpcm_decode_init, + NULL, + NULL, + dpcm_decode_frame, +}; + +AVCodec sol_dpcm_decoder = { + "sol_dpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_SOL_DPCM, + sizeof(DPCMContext), + dpcm_decode_init, + NULL, + NULL, + dpcm_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/dsputil.c b/mpeg4/src/libavcodec/dsputil.c new file mode 100644 index 0000000000000000000000000000000000000000..ad1bfd482f58c6ea5ac80569a3911766b3f3a315 --- /dev/null +++ b/mpeg4/src/libavcodec/dsputil.c @@ -0,0 +1,4116 @@ +/* + * DSP utils + * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer + */ + +/** + * @file dsputil.c + * DSP utils + */ + +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" +#include "simple_idct.h" +#include "faandct.h" +#include "snow.h" + +/* snow.c */ +void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); + +uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; +uint32_t squareTbl[512] = {0, }; + +const uint8_t ff_zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +/* Specific zigzag scan for 248 idct. NOTE that unlike the + specification, we interleave the fields */ +const uint8_t ff_zigzag248_direct[64] = { + 0, 8, 1, 9, 16, 24, 2, 10, + 17, 25, 32, 40, 48, 56, 33, 41, + 18, 26, 3, 11, 4, 12, 19, 27, + 34, 42, 49, 57, 50, 58, 35, 43, + 20, 28, 5, 13, 6, 14, 21, 29, + 36, 44, 51, 59, 52, 60, 37, 45, + 22, 30, 7, 15, 23, 31, 38, 46, + 53, 61, 54, 62, 39, 47, 55, 63, +}; + +/* not permutated inverse zigzag_direct + 1 for MMX quantizer */ +DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, }; + +const uint8_t ff_alternate_horizontal_scan[64] = { + 0, 1, 2, 3, 8, 9, 16, 17, + 10, 11, 4, 5, 6, 7, 15, 14, + 13, 12, 19, 18, 24, 25, 32, 33, + 26, 27, 20, 21, 22, 23, 28, 29, + 30, 31, 34, 35, 40, 41, 48, 49, + 42, 43, 36, 37, 38, 39, 44, 45, + 46, 47, 50, 51, 56, 57, 58, 59, + 52, 53, 54, 55, 60, 61, 62, 63, +}; + +const uint8_t ff_alternate_vertical_scan[64] = { + 0, 8, 16, 24, 1, 9, 2, 10, + 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, + 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, + 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, + 38, 46, 54, 62, 39, 47, 55, 63, +}; + +/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ +const uint32_t inverse[256]={ + 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, + 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, + 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, + 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, + 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, + 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, + 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, + 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, + 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, + 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, + 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, + 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, + 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, + 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, + 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, + 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, + 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, + 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, + 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, + 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, + 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, + 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, + 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, + 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, + 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, + 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, + 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, + 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, + 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, + 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, + 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, + 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, +}; + +/* Input permutation for the simple_idct_mmx */ +static const uint8_t simple_mmx_permutation[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + +static int pix_sum_c(uint8_t * pix, int line_size) +{ + int s, i, j; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += pix[0]; + s += pix[1]; + s += pix[2]; + s += pix[3]; + s += pix[4]; + s += pix[5]; + s += pix[6]; + s += pix[7]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +static int pix_norm1_c(uint8_t * pix, int line_size) +{ + int s, i, j; + uint32_t *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { +#if 0 + s += sq[pix[0]]; + s += sq[pix[1]]; + s += sq[pix[2]]; + s += sq[pix[3]]; + s += sq[pix[4]]; + s += sq[pix[5]]; + s += sq[pix[6]]; + s += sq[pix[7]]; +#else +#if LONG_MAX > 2147483647 + register uint64_t x=*(uint64_t*)pix; + s += sq[x&0xff]; + s += sq[(x>>8)&0xff]; + s += sq[(x>>16)&0xff]; + s += sq[(x>>24)&0xff]; + s += sq[(x>>32)&0xff]; + s += sq[(x>>40)&0xff]; + s += sq[(x>>48)&0xff]; + s += sq[(x>>56)&0xff]; +#else + register uint32_t x=*(uint32_t*)pix; + s += sq[x&0xff]; + s += sq[(x>>8)&0xff]; + s += sq[(x>>16)&0xff]; + s += sq[(x>>24)&0xff]; + x=*(uint32_t*)(pix+4); + s += sq[x&0xff]; + s += sq[(x>>8)&0xff]; + s += sq[(x>>16)&0xff]; + s += sq[(x>>24)&0xff]; +#endif +#endif + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ + int i; + + for(i=0; i+8<=w; i+=8){ + dst[i+0]= bswap_32(src[i+0]); + dst[i+1]= bswap_32(src[i+1]); + dst[i+2]= bswap_32(src[i+2]); + dst[i+3]= bswap_32(src[i+3]); + dst[i+4]= bswap_32(src[i+4]); + dst[i+5]= bswap_32(src[i+5]); + dst[i+6]= bswap_32(src[i+6]); + dst[i+7]= bswap_32(src[i+7]); + } + for(;i>1 : 0; + int size= 1<=0); + + return s>>2; +#endif +} + +static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 1); +} + +static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 0); +} + +static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 1); +} + +static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 0); +} + +static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) +{ + int i; + + /* read the pixels */ + for(i=0;i<8;i++) { + block[0] = pixels[0]; + block[1] = pixels[1]; + block[2] = pixels[2]; + block[3] = pixels[3]; + block[4] = pixels[4]; + block[5] = pixels[5]; + block[6] = pixels[6]; + block[7] = pixels[7]; + pixels += line_size; + block += 8; + } +} + +static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1, + const uint8_t *s2, int stride){ + int i; + + /* read the pixels */ + for(i=0;i<8;i++) { + block[0] = s1[0] - s2[0]; + block[1] = s1[1] - s2[1]; + block[2] = s1[2] - s2[2]; + block[3] = s1[3] - s2[3]; + block[4] = s1[4] - s2[4]; + block[5] = s1[5] - s2[5]; + block[6] = s1[6] - s2[6]; + block[7] = s1[7] - s2[7]; + s1 += stride; + s2 += stride; + block += 8; + } +} + + +static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<8;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + pixels[2] = cm[block[2]]; + pixels[3] = cm[block[3]]; + pixels[4] = cm[block[4]]; + pixels[5] = cm[block[5]]; + pixels[6] = cm[block[6]]; + pixels[7] = cm[block[7]]; + + pixels += line_size; + block += 8; + } +} + +static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + pixels[2] = cm[block[2]]; + pixels[3] = cm[block[3]]; + + pixels += line_size; + block += 8; + } +} + +static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + + pixels += line_size; + block += 8; + } +} + +static void put_signed_pixels_clamped_c(const DCTELEM *block, + uint8_t *restrict pixels, + int line_size) +{ + int i, j; + + for (i = 0; i < 8; i++) { + for (j = 0; j < 8; j++) { + if (*block < -128) + *pixels = 0; + else if (*block > 127) + *pixels = 255; + else + *pixels = (uint8_t)(*block + 128); + block++; + pixels++; + } + pixels += (line_size - 8); + } +} + +static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<8;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels[2] = cm[pixels[2] + block[2]]; + pixels[3] = cm[pixels[3] + block[3]]; + pixels[4] = cm[pixels[4] + block[4]]; + pixels[5] = cm[pixels[5] + block[5]]; + pixels[6] = cm[pixels[6] + block[6]]; + pixels[7] = cm[pixels[7] + block[7]]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels[2] = cm[pixels[2] + block[2]]; + pixels[3] = cm[pixels[3] + block[3]]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) +{ + int i; + for(i=0;i<8;i++) { + pixels[0] += block[0]; + pixels[1] += block[1]; + pixels[2] += block[2]; + pixels[3] += block[3]; + pixels[4] += block[4]; + pixels[5] += block[5]; + pixels[6] += block[6]; + pixels[7] += block[7]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) +{ + int i; + for(i=0;i<4;i++) { + pixels[0] += block[0]; + pixels[1] += block[1]; + pixels[2] += block[2]; + pixels[3] += block[3]; + pixels += line_size; + block += 4; + } +} + +#if 0 + +#define PIXOP2(OPNAME, OP) \ +static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + for(i=0; i>1));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD64(pixels );\ + b= LD64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0202020202020202ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint64_t a= LD64(pixels );\ + const uint64_t b= LD64(pixels+1);\ + uint64_t l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + uint64_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD64(pixels );\ + b= LD64(pixels+1);\ + l0= (a&0x0303030303030303ULL)\ + + (b&0x0303030303030303ULL)\ + + 0x0101010101010101ULL;\ + h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ + + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ + OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8) + +#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) +#else // 64 bit variant + +#define PIXOP2(OPNAME, OP) \ +static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + int i;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + a= LD32(&src1[i*src_stride1+4]);\ + b= LD32(&src2[i*src_stride2+4]);\ + c= LD32(&src3[i*src_stride3+4]);\ + d= LD32(&src4[i*src_stride4+4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + }\ +}\ +\ +static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + int i;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + a= LD32(&src1[i*src_stride1+4]);\ + b= LD32(&src2[i*src_stride2+4]);\ + c= LD32(&src3[i*src_stride3+4]);\ + d= LD32(&src4[i*src_stride4+4]);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + l1= (c&0x03030303UL)\ + + (d&0x03030303UL);\ + h1= ((c&0xFCFCFCFCUL)>>2)\ + + ((d&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + }\ +}\ +static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ + OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ +}\ +static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\ + int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ + OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ + OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ +}\ +\ +static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i, a0, b0, a1, b1;\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + pixels+=line_size;\ + for(i=0; i>2; /* FIXME non put */\ + block[1]= (b1+b0)>>2;\ +\ + pixels+=line_size;\ + block +=line_size;\ +\ + a0= pixels[0];\ + b0= pixels[1] + 2;\ + a0 += b0;\ + b0 += pixels[2];\ +\ + block[0]= (a1+a0)>>2;\ + block[1]= (b1+b0)>>2;\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int i;\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD32(pixels );\ + b= LD32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ +}\ +\ +static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD32(pixels );\ + b= LD32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x02020202UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ +{\ + int j;\ + for(j=0; j<2; j++){\ + int i;\ + const uint32_t a= LD32(pixels );\ + const uint32_t b= LD32(pixels+1);\ + uint32_t l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + uint32_t l1,h1;\ +\ + pixels+=line_size;\ + for(i=0; i>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + a= LD32(pixels );\ + b= LD32(pixels+1);\ + l0= (a&0x03030303UL)\ + + (b&0x03030303UL)\ + + 0x01010101UL;\ + h0= ((a&0xFCFCFCFCUL)>>2)\ + + ((b&0xFCFCFCFCUL)>>2);\ + OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ + pixels+=line_size;\ + block +=line_size;\ + }\ + pixels+=4-line_size*(h+1);\ + block +=4-line_size*h;\ + }\ +}\ +\ +CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ +CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ + +#define op_avg(a, b) a = rnd_avg32(a, b) +#endif +#define op_put(a, b) a = b + +PIXOP2(avg, op_avg) +PIXOP2(put, op_put) +#undef op_avg +#undef op_put + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ + put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); +} + +static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ + put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); +} + +static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder) +{ + const int A=(16-x16)*(16-y16); + const int B=( x16)*(16-y16); + const int C=(16-x16)*( y16); + const int D=( x16)*( y16); + int i; + + for(i=0; i>8; + dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; + dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; + dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8; + dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8; + dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8; + dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; + dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; + dst+= stride; + src+= stride; + } +} + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) +{ + int y, vx, vy; + const int s= 1<>16; + src_y= vy>>16; + frac_x= src_x&(s-1); + frac_y= src_y&(s-1); + src_x>>=shift; + src_y>>=shift; + + if((unsigned)src_x < width){ + if((unsigned)src_y < height){ + index= src_x + src_y*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + + src[index +1]* frac_x )*(s-frac_y) + + ( src[index+stride ]*(s-frac_x) + + src[index+stride+1]* frac_x )* frac_y + + r)>>(shift*2); + }else{ + index= src_x + clip(src_y, 0, height)*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_x) + + src[index +1]* frac_x )*s + + r)>>(shift*2); + } + }else{ + if((unsigned)src_y < height){ + index= clip(src_x, 0, width) + src_y*stride; + dst[y*stride + x]= ( ( src[index ]*(s-frac_y) + + src[index+stride ]* frac_y )*s + + r)>>(shift*2); + }else{ + index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride; + dst[y*stride + x]= src[index ]; + } + } + + vx+= dxx; + vy+= dyx; + } + ox += dxy; + oy += dyy; + } +} + +static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + switch(width){ + case 2: put_pixels2_c (dst, src, stride, height); break; + case 4: put_pixels4_c (dst, src, stride, height); break; + case 8: put_pixels8_c (dst, src, stride, height); break; + case 16:put_pixels16_c(dst, src, stride, height); break; + } +} + +static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + switch(width){ + case 2: avg_pixels2_c (dst, src, stride, height); break; + case 4: avg_pixels4_c (dst, src, stride, height); break; + case 8: avg_pixels8_c (dst, src, stride, height); break; + case 16:avg_pixels16_c(dst, src, stride, height); break; + } +} + +static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} + +static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ + int i,j; + for (i=0; i < height; i++) { + for (j=0; j < width; j++) { + dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1; + } + src += stride; + dst += stride; + } +} +#if 0 +#define TPEL_WIDTH(width)\ +static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\ +static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\ + void put_tpel_pixels_mc22_c(dst, src, stride, width, height);} +#endif + +#define H264_CHROMA_MC(OPNAME, OP)\ +static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ + const int A=(8-x)*(8-y);\ + const int B=( x)*(8-y);\ + const int C=(8-x)*( y);\ + const int D=( x)*( y);\ + int i;\ + \ + assert(x<8 && y<8 && x>=0 && y>=0);\ +\ + for(i=0; i=0 && y>=0);\ +\ + for(i=0; i=0 && y>=0);\ +\ + for(i=0; i>6)+1)>>1) +#define op_put(a, b) a = (((b) + 32)>>6) + +H264_CHROMA_MC(put_ , op_put) +H264_CHROMA_MC(avg_ , op_avg) +#undef op_avg +#undef op_put + +static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i>5]+1)>>1) +#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1) +#define op_put(a, b) a = cm[((b) + 16)>>5] +#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5] + +QPEL_MC(0, put_ , _ , op_put) +QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd) +QPEL_MC(0, avg_ , _ , op_avg) +//QPEL_MC(1, avg_no_rnd , _ , op_avg) +#undef op_avg +#undef op_avg_no_rnd +#undef op_put +#undef op_put_no_rnd + +#if 1 +#define H264_LOWPASS(OPNAME, OP, OP2) \ +static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + const int h=2;\ + uint8_t *cm = cropTbl + MAX_NEG_CROP;\ + int i;\ + for(i=0; i>5]+1)>>1) +//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) +#define op_put(a, b) a = cm[((b) + 16)>>5] +#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) +#define op2_put(a, b) a = cm[((b) + 512)>>10] + +H264_LOWPASS(put_ , op_put, op2_put) +H264_LOWPASS(avg_ , op_avg, op2_avg) +H264_MC(put_, 2) +H264_MC(put_, 4) +H264_MC(put_, 8) +H264_MC(put_, 16) +H264_MC(avg_, 4) +H264_MC(avg_, 8) +H264_MC(avg_, 16) + +#undef op_avg +#undef op_put +#undef op2_avg +#undef op2_put +#endif + +#define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom ) +#define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) +#define H264_WEIGHT(W,H) \ +static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ + int y; \ + offset <<= log2_denom; \ + if(log2_denom) offset += 1<<(log2_denom-1); \ + for(y=0; y>4]; + dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; + dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; + dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; + dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; + dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; + dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; + dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; + dst+=dstStride; + src+=srcStride; + } +} + +static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int i; + + for(i=0; i>4]; + dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; + dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; + dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; + dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; + dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; + dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; + dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; + src++; + dst++; + } +} + +static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ + put_pixels8_c(dst, src, stride, 8); +} + +static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t half[64]; + wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); + put_pixels8_l2(dst, src, half, stride, stride, 8, 8); +} + +static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ + wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); +} + +static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t half[64]; + wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); + put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); +} + +static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ + wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); +} + +static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t halfH[88]; + uint8_t halfV[64]; + uint8_t halfHV[64]; + wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); + wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); + wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); + put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); +} +static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t halfH[88]; + uint8_t halfV[64]; + uint8_t halfHV[64]; + wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); + wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); + wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); + put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); +} +static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t halfH[88]; + wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); + wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); +} + +static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){ + int x; + const int strength= ff_h263_loop_filter_strength[qscale]; + + for(x=0; x<8; x++){ + int d1, d2, ad1; + int p0= src[x-2*stride]; + int p1= src[x-1*stride]; + int p2= src[x+0*stride]; + int p3= src[x+1*stride]; + int d = (p0 - p3 + 4*(p2 - p1)) / 8; + + if (d<-2*strength) d1= 0; + else if(d<- strength) d1=-2*strength - d; + else if(d< strength) d1= d; + else if(d< 2*strength) d1= 2*strength - d; + else d1= 0; + + p1 += d1; + p2 -= d1; + if(p1&256) p1= ~(p1>>31); + if(p2&256) p2= ~(p2>>31); + + src[x-1*stride] = p1; + src[x+0*stride] = p2; + + ad1= ABS(d1)>>1; + + d2= clip((p0-p3)/4, -ad1, ad1); + + src[x-2*stride] = p0 - d2; + src[x+ stride] = p3 + d2; + } +} + +static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ + int y; + const int strength= ff_h263_loop_filter_strength[qscale]; + + for(y=0; y<8; y++){ + int d1, d2, ad1; + int p0= src[y*stride-2]; + int p1= src[y*stride-1]; + int p2= src[y*stride+0]; + int p3= src[y*stride+1]; + int d = (p0 - p3 + 4*(p2 - p1)) / 8; + + if (d<-2*strength) d1= 0; + else if(d<- strength) d1=-2*strength - d; + else if(d< strength) d1= d; + else if(d< 2*strength) d1= 2*strength - d; + else d1= 0; + + p1 += d1; + p2 -= d1; + if(p1&256) p1= ~(p1>>31); + if(p2&256) p2= ~(p2>>31); + + src[y*stride-1] = p1; + src[y*stride+0] = p2; + + ad1= ABS(d1)>>1; + + d2= clip((p0-p3)/4, -ad1, ad1); + + src[y*stride-2] = p0 - d2; + src[y*stride+1] = p3 + d2; + } +} + +static void h261_loop_filter_c(uint8_t *src, int stride){ + int x,y,xy,yz; + int temp[64]; + + for(x=0; x<8; x++){ + temp[x ] = 4*src[x ]; + temp[x + 7*8] = 4*src[x + 7*stride]; + } + for(y=1; y<7; y++){ + for(x=0; x<8; x++){ + xy = y * stride + x; + yz = y * 8 + x; + temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; + } + } + + for(y=0; y<8; y++){ + src[ y*stride] = (temp[ y*8] + 2)>>2; + src[7+y*stride] = (temp[7+y*8] + 2)>>2; + for(x=1; x<7; x++){ + xy = y * stride + x; + yz = y * 8 + x; + src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; + } + } +} + +static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) +{ + int i, d; + for( i = 0; i < 4; i++ ) { + if( tc0[i] < 0 ) { + pix += 4*ystride; + continue; + } + for( d = 0; d < 4; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int p2 = pix[-3*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + const int q2 = pix[2*xstride]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + int tc = tc0[i]; + int i_delta; + + if( ABS( p2 - p0 ) < beta ) { + pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] ); + tc++; + } + if( ABS( q2 - q0 ) < beta ) { + pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] ); + tc++; + } + + i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-xstride] = clip_uint8( p0 + i_delta ); /* p0' */ + pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + } + pix += ystride; + } + } +} +static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0); +} +static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0); +} + +static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0) +{ + int i, d; + for( i = 0; i < 4; i++ ) { + const int tc = tc0[i]; + if( tc <= 0 ) { + pix += 2*ystride; + continue; + } + for( d = 0; d < 2; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + int delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + + pix[-xstride] = clip_uint8( p0 + delta ); /* p0' */ + pix[0] = clip_uint8( q0 - delta ); /* q0' */ + } + pix += ystride; + } + } +} +static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0); +} +static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0); +} + +static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta) +{ + int d; + for( d = 0; d < 8; d++ ) { + const int p0 = pix[-1*xstride]; + const int p1 = pix[-2*xstride]; + const int q0 = pix[0]; + const int q1 = pix[1*xstride]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + } + pix += ystride; + } +} +static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta); +} +static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta); +} + +static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) +{ + int s, i; + + s = 0; + for(i=0;iavctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){ + MpegEncContext *c = v; + int score1=0; + int score2=0; + int x,y; + + for(y=0; yavctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ + int i; + unsigned int sum=0; + + for(i=0; i<8*8; i++){ + int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT)); + int w= weight[i]; + b>>= RECON_SHIFT; + assert(-512>4; + } + return sum>>2; +} + +static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){ + int i; + + for(i=0; i<8*8; i++){ + rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT); + } +} + +/** + * permutes an 8x8 block. + * @param block the block which will be permuted according to the given permutation vector + * @param permutation the permutation vector + * @param last the last non zero coefficient in scantable order, used to speed the permutation up + * @param scantable the used scantable, this is only used to speed the permutation up, the block is not + * (inverse) permutated to scantable order! + */ +void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last) +{ + int i; + DCTELEM temp[64]; + + if(last<=0) return; + //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + temp[j]= block[j]; + block[j]=0; + } + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + const int perm_j= permutation[j]; + block[perm_j]= temp[j]; + } +} + +static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ + return 0; +} + +void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ + int i; + + memset(cmp, 0, sizeof(void*)*5); + + for(i=0; i<5; i++){ + switch(type&0xFF){ + case FF_CMP_SAD: + cmp[i]= c->sad[i]; + break; + case FF_CMP_SATD: + cmp[i]= c->hadamard8_diff[i]; + break; + case FF_CMP_SSE: + cmp[i]= c->sse[i]; + break; + case FF_CMP_DCT: + cmp[i]= c->dct_sad[i]; + break; + case FF_CMP_DCT264: + cmp[i]= c->dct264_sad[i]; + break; + case FF_CMP_DCTMAX: + cmp[i]= c->dct_max[i]; + break; + case FF_CMP_PSNR: + cmp[i]= c->quant_psnr[i]; + break; + case FF_CMP_BIT: + cmp[i]= c->bit[i]; + break; + case FF_CMP_RD: + cmp[i]= c->rd[i]; + break; + case FF_CMP_VSAD: + cmp[i]= c->vsad[i]; + break; + case FF_CMP_VSSE: + cmp[i]= c->vsse[i]; + break; + case FF_CMP_ZERO: + cmp[i]= zero_cmp; + break; + case FF_CMP_NSSE: + cmp[i]= c->nsse[i]; + break; + case FF_CMP_W53: + cmp[i]= c->w53[i]; + break; + case FF_CMP_W97: + cmp[i]= c->w97[i]; + break; + default: + av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); + } + } +} + +/** + * memset(blocks, 0, sizeof(DCTELEM)*6*64) + */ +static void clear_blocks_c(DCTELEM *blocks) +{ + memset(blocks, 0, sizeof(DCTELEM)*6*64); +} + +static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){ + int i; + for(i=0; i+7maxi){ + maxi=sum; + printf("MAX:%d\n", maxi); +} +#endif + return sum; +} + +static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){ + int i; + int temp[64]; + int sum=0; + + assert(h==8); + + for(i=0; i<8; i++){ + //FIXME try pointer walks + BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]); + BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]); + BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]); + BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]); + + BUTTERFLY1(temp[8*i+0], temp[8*i+2]); + BUTTERFLY1(temp[8*i+1], temp[8*i+3]); + BUTTERFLY1(temp[8*i+4], temp[8*i+6]); + BUTTERFLY1(temp[8*i+5], temp[8*i+7]); + + BUTTERFLY1(temp[8*i+0], temp[8*i+4]); + BUTTERFLY1(temp[8*i+1], temp[8*i+5]); + BUTTERFLY1(temp[8*i+2], temp[8*i+6]); + BUTTERFLY1(temp[8*i+3], temp[8*i+7]); + } + + for(i=0; i<8; i++){ + BUTTERFLY1(temp[8*0+i], temp[8*1+i]); + BUTTERFLY1(temp[8*2+i], temp[8*3+i]); + BUTTERFLY1(temp[8*4+i], temp[8*5+i]); + BUTTERFLY1(temp[8*6+i], temp[8*7+i]); + + BUTTERFLY1(temp[8*0+i], temp[8*2+i]); + BUTTERFLY1(temp[8*1+i], temp[8*3+i]); + BUTTERFLY1(temp[8*4+i], temp[8*6+i]); + BUTTERFLY1(temp[8*5+i], temp[8*7+i]); + + sum += + BUTTERFLYA(temp[8*0+i], temp[8*4+i]) + +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) + +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) + +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); + } + + sum -= ABS(temp[8*0] + temp[8*4]); // -mean + + return sum; +} + +static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); + DCTELEM * const temp= (DCTELEM*)aligned_temp; + int sum=0, i; + + assert(h==8); + + s->dsp.diff_pixels(temp, src1, src2, stride); + s->dsp.fdct(temp); + + for(i=0; i<64; i++) + sum+= ABS(temp[i]); + + return sum; +} + +#ifdef CONFIG_GPL +#define DCT8_1D {\ + const int s07 = SRC(0) + SRC(7);\ + const int s16 = SRC(1) + SRC(6);\ + const int s25 = SRC(2) + SRC(5);\ + const int s34 = SRC(3) + SRC(4);\ + const int a0 = s07 + s34;\ + const int a1 = s16 + s25;\ + const int a2 = s07 - s34;\ + const int a3 = s16 - s25;\ + const int d07 = SRC(0) - SRC(7);\ + const int d16 = SRC(1) - SRC(6);\ + const int d25 = SRC(2) - SRC(5);\ + const int d34 = SRC(3) - SRC(4);\ + const int a4 = d16 + d25 + (d07 + (d07>>1));\ + const int a5 = d07 - d34 - (d25 + (d25>>1));\ + const int a6 = d07 + d34 - (d16 + (d16>>1));\ + const int a7 = d16 - d25 + (d34 + (d34>>1));\ + DST(0, a0 + a1 ) ;\ + DST(1, a4 + (a7>>2)) ;\ + DST(2, a2 + (a3>>1)) ;\ + DST(3, a5 + (a6>>2)) ;\ + DST(4, a0 - a1 ) ;\ + DST(5, a6 - (a5>>2)) ;\ + DST(6, (a2>>1) - a3 ) ;\ + DST(7, (a4>>2) - a7 ) ;\ +} + +static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + int16_t dct[8][8]; + int i; + int sum=0; + + s->dsp.diff_pixels(dct, src1, src2, stride); + +#define SRC(x) dct[i][x] +#define DST(x,v) dct[i][x]= v + for( i = 0; i < 8; i++ ) + DCT8_1D +#undef SRC +#undef DST + +#define SRC(x) dct[x][i] +#define DST(x,v) sum += ABS(v) + for( i = 0; i < 8; i++ ) + DCT8_1D +#undef SRC +#undef DST + return sum; +} +#endif + +static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); + DCTELEM * const temp= (DCTELEM*)aligned_temp; + int sum=0, i; + + assert(h==8); + + s->dsp.diff_pixels(temp, src1, src2, stride); + s->dsp.fdct(temp); + + for(i=0; i<64; i++) + sum= FFMAX(sum, ABS(temp[i])); + + return sum; +} + +void simple_idct(DCTELEM *block); //FIXME + +static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]); + DCTELEM * const temp= (DCTELEM*)aligned_temp; + DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64; + int sum=0, i; + + assert(h==8); + s->mb_intra=0; + + s->dsp.diff_pixels(temp, src1, src2, stride); + + memcpy(bak, temp, 64*sizeof(DCTELEM)); + + s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + s->dct_unquantize_inter(s, temp, 0, s->qscale); + simple_idct(temp); //FIXME + + for(i=0; i<64; i++) + sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); + + return sum; +} + +static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + const uint8_t *scantable= s->intra_scantable.permutated; + DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); + DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]); + DCTELEM * const temp= (DCTELEM*)aligned_temp; + uint8_t * const bak= (uint8_t*)aligned_bak; + int i, last, run, bits, level, distoration, start_i; + const int esc_length= s->ac_esc_length; + uint8_t * length; + uint8_t * last_length; + + assert(h==8); + + for(i=0; i<8; i++){ + ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; + ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; + } + + s->dsp.diff_pixels(temp, src1, src2, stride); + + s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + + bits=0; + + if (s->mb_intra) { + start_i = 1; + length = s->intra_ac_vlc_length; + last_length= s->intra_ac_vlc_last_length; + bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma + } else { + start_i = 0; + length = s->inter_ac_vlc_length; + last_length= s->inter_ac_vlc_last_length; + } + + if(last>=start_i){ + run=0; + for(i=start_i; i=0){ + if(s->mb_intra) + s->dct_unquantize_intra(s, temp, 0, s->qscale); + else + s->dct_unquantize_inter(s, temp, 0, s->qscale); + } + + s->dsp.idct_add(bak, stride, temp); + + distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8); + + return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); +} + +static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + const uint8_t *scantable= s->intra_scantable.permutated; + DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]); + DCTELEM * const temp= (DCTELEM*)aligned_temp; + int i, last, run, bits, level, start_i; + const int esc_length= s->ac_esc_length; + uint8_t * length; + uint8_t * last_length; + + assert(h==8); + + s->dsp.diff_pixels(temp, src1, src2, stride); + + s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + + bits=0; + + if (s->mb_intra) { + start_i = 1; + length = s->intra_ac_vlc_length; + last_length= s->intra_ac_vlc_last_length; + bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma + } else { + start_i = 0; + length = s->inter_ac_vlc_length; + last_length= s->inter_ac_vlc_last_length; + } + + if(last>=start_i){ + run=0; + for(i=start_i; i>3]; +} +static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; +} + +static void just_return() { return; } + +/* init static data */ +void dsputil_static_init(void) +{ + int i; + + for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; + for(i=0;idct_algo==FF_DCT_FASTINT) { + c->fdct = fdct_ifast; + c->fdct248 = fdct_ifast248; + } + else if(avctx->dct_algo==FF_DCT_FAAN) { + c->fdct = ff_faandct; + c->fdct248 = ff_faandct248; + } + else { + c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default + c->fdct248 = ff_fdct248_islow; + } +#endif //CONFIG_ENCODERS + + if(avctx->lowres==1){ + if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ + c->idct_put= ff_jref_idct4_put; + c->idct_add= ff_jref_idct4_add; + }else{ + c->idct_put= ff_h264_lowres_idct_put_c; + c->idct_add= ff_h264_lowres_idct_add_c; + } + c->idct = j_rev_dct4; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==2){ + c->idct_put= ff_jref_idct2_put; + c->idct_add= ff_jref_idct2_add; + c->idct = j_rev_dct2; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==3){ + c->idct_put= ff_jref_idct1_put; + c->idct_add= ff_jref_idct1_add; + c->idct = j_rev_dct1; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else{ + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct = j_rev_dct; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else if(avctx->idct_algo==FF_IDCT_VP3){ + c->idct_put= ff_vp3_idct_put_c; + c->idct_add= ff_vp3_idct_add_c; + c->idct = ff_vp3_idct_c; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct = simple_idct; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } + } + + c->h264_idct_add= ff_h264_idct_add_c; + c->h264_idct8_add= ff_h264_idct8_add_c; + c->h264_idct_dc_add= ff_h264_idct_dc_add_c; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; + + c->get_pixels = get_pixels_c; + c->diff_pixels = diff_pixels_c; + c->put_pixels_clamped = put_pixels_clamped_c; + c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; + c->add_pixels_clamped = add_pixels_clamped_c; + c->add_pixels8 = add_pixels8_c; + c->add_pixels4 = add_pixels4_c; + c->gmc1 = gmc1_c; + c->gmc = ff_gmc_c; + c->clear_blocks = clear_blocks_c; + c->pix_sum = pix_sum_c; + c->pix_norm1 = pix_norm1_c; + + /* TODO [0] 16 [1] 8 */ + c->pix_abs[0][0] = pix_abs16_c; + c->pix_abs[0][1] = pix_abs16_x2_c; + c->pix_abs[0][2] = pix_abs16_y2_c; + c->pix_abs[0][3] = pix_abs16_xy2_c; + c->pix_abs[1][0] = pix_abs8_c; + c->pix_abs[1][1] = pix_abs8_x2_c; + c->pix_abs[1][2] = pix_abs8_y2_c; + c->pix_abs[1][3] = pix_abs8_xy2_c; + +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ + c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ + c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ + c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c + + dspfunc(put, 0, 16); + dspfunc(put_no_rnd, 0, 16); + dspfunc(put, 1, 8); + dspfunc(put_no_rnd, 1, 8); + dspfunc(put, 2, 4); + dspfunc(put, 3, 2); + + dspfunc(avg, 0, 16); + dspfunc(avg_no_rnd, 0, 16); + dspfunc(avg, 1, 8); + dspfunc(avg_no_rnd, 1, 8); + dspfunc(avg, 2, 4); + dspfunc(avg, 3, 2); +#undef dspfunc + + c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; + c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; + + c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; + c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; + c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; + c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; + c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; + c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; + c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; + c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; + c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; + + c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; + c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; + c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; + c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; + c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; + c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; + c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; + c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; + c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; + +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \ + c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \ + c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \ + c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \ + c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \ + c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \ + c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \ + c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \ + c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \ + c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \ + c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \ + c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \ + c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \ + c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \ + c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \ + c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c + + dspfunc(put_qpel, 0, 16); + dspfunc(put_no_rnd_qpel, 0, 16); + + dspfunc(avg_qpel, 0, 16); + /* dspfunc(avg_no_rnd_qpel, 0, 16); */ + + dspfunc(put_qpel, 1, 8); + dspfunc(put_no_rnd_qpel, 1, 8); + + dspfunc(avg_qpel, 1, 8); + /* dspfunc(avg_no_rnd_qpel, 1, 8); */ + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(put_h264_qpel, 1, 8); + dspfunc(put_h264_qpel, 2, 4); + dspfunc(put_h264_qpel, 3, 2); + dspfunc(avg_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 1, 8); + dspfunc(avg_h264_qpel, 2, 4); + +#undef dspfunc + c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; + c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; + c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; + c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; + c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; + + c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; + c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; + c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; + c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; + c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; + c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; + c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; + c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; + c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; + c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; + c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; + c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; + c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; + c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; + c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; + c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; + c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; + c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; + c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; + c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; + + c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; + c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; + c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; + c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; + c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; + c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; + c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; + c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; + +#define SET_CMP_FUNC(name) \ + c->name[0]= name ## 16_c;\ + c->name[1]= name ## 8x8_c; + + SET_CMP_FUNC(hadamard8_diff) + c->hadamard8_diff[4]= hadamard8_intra16_c; + SET_CMP_FUNC(dct_sad) + SET_CMP_FUNC(dct_max) +#ifdef CONFIG_GPL + SET_CMP_FUNC(dct264_sad) +#endif + c->sad[0]= pix_abs16_c; + c->sad[1]= pix_abs8_c; + c->sse[0]= sse16_c; + c->sse[1]= sse8_c; + c->sse[2]= sse4_c; + SET_CMP_FUNC(quant_psnr) + SET_CMP_FUNC(rd) + SET_CMP_FUNC(bit) + c->vsad[0]= vsad16_c; + c->vsad[4]= vsad_intra16_c; + c->vsse[0]= vsse16_c; + c->vsse[4]= vsse_intra16_c; + c->nsse[0]= nsse16_c; + c->nsse[1]= nsse8_c; + c->w53[0]= w53_16_c; + c->w53[1]= w53_8_c; + c->w97[0]= w97_16_c; + c->w97[1]= w97_8_c; + + c->add_bytes= add_bytes_c; + c->diff_bytes= diff_bytes_c; + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; + c->bswap_buf= bswap_buf; + + c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; + c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; + c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; + c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; + c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; + c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; + + c->h263_h_loop_filter= h263_h_loop_filter_c; + c->h263_v_loop_filter= h263_v_loop_filter_c; + + c->h261_loop_filter= h261_loop_filter_c; + + c->try_8x8basis= try_8x8basis_c; + c->add_8x8basis= add_8x8basis_c; + +#ifdef CONFIG_SNOW_ENCODER + c->vertical_compose97i = ff_snow_vertical_compose97i; + c->horizontal_compose97i = ff_snow_horizontal_compose97i; + c->inner_add_yblock = ff_snow_inner_add_yblock; +#endif + + c->shrink[0]= ff_img_copy_plane; + c->shrink[1]= ff_shrink22; + c->shrink[2]= ff_shrink44; + c->shrink[3]= ff_shrink88; + + c->prefetch= just_return; + +#ifdef HAVE_MMX + dsputil_init_mmx(c, avctx); +#endif +#ifdef ARCH_ARMV4L + dsputil_init_armv4l(c, avctx); +#endif +#ifdef HAVE_MLIB + dsputil_init_mlib(c, avctx); +#endif +#ifdef ARCH_SPARC + dsputil_init_vis(c,avctx); +#endif +#ifdef ARCH_ALPHA + dsputil_init_alpha(c, avctx); +#endif +#ifdef ARCH_POWERPC + dsputil_init_ppc(c, avctx); +#endif +#ifdef HAVE_MMI + dsputil_init_mmi(c, avctx); +#endif +#ifdef ARCH_SH4 + dsputil_init_sh4(c,avctx); +#endif + + switch(c->idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + case FF_PARTTRANS_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3); + break; + default: + av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n"); + } +} + diff --git a/mpeg4/src/libavcodec/dsputil.h b/mpeg4/src/libavcodec/dsputil.h new file mode 100644 index 0000000000000000000000000000000000000000..44e6a9efc7418a2b1792acfc1a9be06488e6e910 --- /dev/null +++ b/mpeg4/src/libavcodec/dsputil.h @@ -0,0 +1,638 @@ +/* + * DSP utils + * Copyright (c) 2000, 2001, 2002 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file dsputil.h + * DSP utils. + * note, many functions in here may use MMX which trashes the FPU state, it is + * absolutely necessary to call emms_c() between dsp & float/double code + */ + +#ifndef DSPUTIL_H +#define DSPUTIL_H + +#include "common.h" +#include "avcodec.h" + + +//#define DEBUG +/* dct code */ +typedef short DCTELEM; +typedef int DWTELEM; + +void fdct_ifast (DCTELEM *data); +void fdct_ifast248 (DCTELEM *data); +void ff_jpeg_fdct_islow (DCTELEM *data); +void ff_fdct248_islow (DCTELEM *data); + +void j_rev_dct (DCTELEM *data); +void j_rev_dct4 (DCTELEM *data); +void j_rev_dct2 (DCTELEM *data); +void j_rev_dct1 (DCTELEM *data); + +void ff_fdct_mmx(DCTELEM *block); +void ff_fdct_mmx2(DCTELEM *block); +void ff_fdct_sse2(DCTELEM *block); + +void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); +void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); + +/* encoding scans */ +extern const uint8_t ff_alternate_horizontal_scan[64]; +extern const uint8_t ff_alternate_vertical_scan[64]; +extern const uint8_t ff_zigzag_direct[64]; +extern const uint8_t ff_zigzag248_direct[64]; + +/* pixel operations */ +#define MAX_NEG_CROP 1024 + +/* temporary */ +extern uint32_t squareTbl[512]; +extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; + +/* VP3 DSP functions */ +void ff_vp3_idct_c(DCTELEM *block/* align 16*/); +void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); +void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + +/* 1/2^n downscaling functions from imgconvert.c */ +void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); + +/* minimum alignment rules ;) +if u notice errors in the align stuff, need more alignment for some asm code for some cpu +or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... + +!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) +i (michael) didnt check them, these are just the alignents which i think could be reached easily ... + +!future video codecs might need functions with less strict alignment +*/ + +/* +void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size); +void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); +void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); +void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size); +void clear_blocks_c(DCTELEM *blocks); +*/ + +/* add and put pixel (decoding) */ +// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 +//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4 +typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); +typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); +typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); +typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); +typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset); + +#define DEF_OLD_QPEL(name)\ +void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ +void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ +void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); + +DEF_OLD_QPEL(qpel16_mc11_old_c) +DEF_OLD_QPEL(qpel16_mc31_old_c) +DEF_OLD_QPEL(qpel16_mc12_old_c) +DEF_OLD_QPEL(qpel16_mc32_old_c) +DEF_OLD_QPEL(qpel16_mc13_old_c) +DEF_OLD_QPEL(qpel16_mc33_old_c) +DEF_OLD_QPEL(qpel8_mc11_old_c) +DEF_OLD_QPEL(qpel8_mc31_old_c) +DEF_OLD_QPEL(qpel8_mc12_old_c) +DEF_OLD_QPEL(qpel8_mc32_old_c) +DEF_OLD_QPEL(qpel8_mc13_old_c) +DEF_OLD_QPEL(qpel8_mc33_old_c) + +#define CALL_2X_PIXELS(a, b, n)\ +static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ + b(block , pixels , line_size, h);\ + b(block+n, pixels+n, line_size, h);\ +} + +/* motion estimation */ +// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2 +// allthough currently h<4 is not used as functions with width <8 are not used and neither implemented +typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; + + +// for snow slices +typedef struct slice_buffer_s slice_buffer; + +/** + * DSPContext. + */ +typedef struct DSPContext { + /* pixel ops : interface with DCT */ + void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); + void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); + void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); + void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); + void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); + void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size); + void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size); + /** + * translational global motion compensation. + */ + void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); + /** + * global motion compensation. + */ + void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); + void (*clear_blocks)(DCTELEM *blocks/*align 16*/); + int (*pix_sum)(uint8_t * pix, int line_size); + int (*pix_norm1)(uint8_t * pix, int line_size); +// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4 + + me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */ + me_cmp_func sse[5]; + me_cmp_func hadamard8_diff[5]; + me_cmp_func dct_sad[5]; + me_cmp_func quant_psnr[5]; + me_cmp_func bit[5]; + me_cmp_func rd[5]; + me_cmp_func vsad[5]; + me_cmp_func vsse[5]; + me_cmp_func nsse[5]; + me_cmp_func w53[5]; + me_cmp_func w97[5]; + me_cmp_func dct_max[5]; + me_cmp_func dct264_sad[5]; + + me_cmp_func me_pre_cmp[5]; + me_cmp_func me_cmp[5]; + me_cmp_func me_sub_cmp[5]; + me_cmp_func mb_cmp[5]; + me_cmp_func ildct_cmp[5]; //only width 16 used + me_cmp_func frame_skip_cmp[5]; //only width 8 used + + /** + * Halfpel motion compensation with rounding (a+b+1)>>1. + * this is an array[4][4] of motion compensation funcions for 4 + * horizontal blocksizes (8,16) and the 4 halfpel positions
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func put_pixels_tab[4][4]; + + /** + * Halfpel motion compensation with rounding (a+b+1)>>1. + * This is an array[4][4] of motion compensation functions for 4 + * horizontal blocksizes (8,16) and the 4 halfpel positions
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination into which the result is averaged (a+b+1)>>1 + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func avg_pixels_tab[4][4]; + + /** + * Halfpel motion compensation with no rounding (a+b)>>1. + * this is an array[2][4] of motion compensation funcions for 2 + * horizontal blocksizes (8,16) and the 4 halfpel positions
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func put_no_rnd_pixels_tab[4][4]; + + /** + * Halfpel motion compensation with no rounding (a+b)>>1. + * this is an array[2][4] of motion compensation funcions for 2 + * horizontal blocksizes (8,16) and the 4 halfpel positions
+ * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * @param block destination into which the result is averaged (a+b)>>1 + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + op_pixels_func avg_no_rnd_pixels_tab[4][4]; + + void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); + + /** + * Thirdpel motion compensation with rounding (a+b+1)>>1. + * this is an array[12] of motion compensation funcions for the 9 thirdpel positions
+ * *pixels_tab[ xthirdpel + 4*ythirdpel ] + * @param block destination where the result is stored + * @param pixels source + * @param line_size number of bytes in a horizontal line of block + * @param h height + */ + tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width? + tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width? + + qpel_mc_func put_qpel_pixels_tab[2][16]; + qpel_mc_func avg_qpel_pixels_tab[2][16]; + qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; + qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; + qpel_mc_func put_mspel_pixels_tab[8]; + + /** + * h264 Chram MC + */ + h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; + h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; + + qpel_mc_func put_h264_qpel_pixels_tab[4][16]; + qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; + + h264_weight_func weight_h264_pixels_tab[10]; + h264_biweight_func biweight_h264_pixels_tab[10]; + + me_cmp_func pix_abs[2][4]; + + /* huffyuv specific */ + void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); + void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); + /** + * subtract huffyuv's variant of median prediction + * note, this might read from src1[-1], src2[-1] + */ + void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); + void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); + + void (*h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); + void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); + void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); + + void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); + void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); + + void (*h261_loop_filter)(uint8_t *src, int stride); + + /* (I)DCT */ + void (*fdct)(DCTELEM *block/* align 16*/); + void (*fdct248)(DCTELEM *block/* align 16*/); + + /* IDCT really*/ + void (*idct)(DCTELEM *block/* align 16*/); + + /** + * block -> idct -> clip to unsigned 8 bit -> dest. + * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...) + * @param line_size size in bytes of a horizotal line of dest + */ + void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + + /** + * block -> idct -> add dest -> clip to unsigned 8 bit -> dest. + * @param line_size size in bytes of a horizotal line of dest + */ + void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + + /** + * idct input permutation. + * several optimized IDCTs need a permutated input (relative to the normal order of the reference + * IDCT) + * this permutation must be performed before the idct_put/add, note, normally this can be merged + * with the zigzag/alternate scan
+ * an example to avoid confusion: + * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...) + * - (x -> referece dct -> reference idct -> x) + * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x) + * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...) + */ + uint8_t idct_permutation[64]; + int idct_permutation_type; +#define FF_NO_IDCT_PERM 1 +#define FF_LIBMPEG2_IDCT_PERM 2 +#define FF_SIMPLE_IDCT_PERM 3 +#define FF_TRANSPOSE_IDCT_PERM 4 +#define FF_PARTTRANS_IDCT_PERM 5 + + int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale); + void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); +#define BASIS_SHIFT 16 +#define RECON_SHIFT 6 + + void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + + /* snow wavelet */ + void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); + void (*horizontal_compose97i)(DWTELEM *b, int width); + void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); + + void (*prefetch)(void *mem, int stride, int h); + + void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +} DSPContext; + +void dsputil_static_init(void); +void dsputil_init(DSPContext* p, AVCodecContext *avctx); + +/** + * permute block according to permuatation. + * @param last last non zero element in scantable order + */ +void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); + +void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); + +#define BYTE_VEC32(c) ((c)*0x01010101UL) + +static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) +{ + return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); +} + +static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) +{ + return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); +} + +static inline int get_penalty_factor(int lambda, int lambda2, int type){ + switch(type&0xFF){ + default: + case FF_CMP_SAD: + return lambda>>FF_LAMBDA_SHIFT; + case FF_CMP_DCT: + return (3*lambda)>>(FF_LAMBDA_SHIFT+1); + case FF_CMP_W53: + return (4*lambda)>>(FF_LAMBDA_SHIFT); + case FF_CMP_W97: + return (2*lambda)>>(FF_LAMBDA_SHIFT); + case FF_CMP_SATD: + case FF_CMP_DCT264: + return (2*lambda)>>FF_LAMBDA_SHIFT; + case FF_CMP_RD: + case FF_CMP_PSNR: + case FF_CMP_SSE: + case FF_CMP_NSSE: + return lambda2>>FF_LAMBDA_SHIFT; + case FF_CMP_BIT: + return 1; + } +} + +/** + * Empty mmx state. + * this must be called between any dsp function and float/double code. + * for example sin(); dsp->idct_put(); emms_c(); cos() + */ +#define emms_c() + +/* should be defined by architectures supporting + one or more MultiMedia extension */ +int mm_support(void); + +#ifdef __GNUC__ + #define DECLARE_ALIGNED_16(t,v) t v __attribute__ ((aligned (16))) +#else + #define DECLARE_ALIGNED_16(t,v) __declspec(align(16)) t v +#endif + +#if defined(HAVE_MMX) + +#undef emms_c + +#define MM_MMX 0x0001 /* standard MMX */ +#define MM_3DNOW 0x0004 /* AMD 3DNOW */ +#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ +#define MM_SSE 0x0008 /* SSE functions */ +#define MM_SSE2 0x0010 /* PIV SSE2 functions */ +#define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ + +extern int mm_flags; + +void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); +void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); +void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); + +static inline void emms(void) +{ + __asm __volatile ("emms;":::"memory"); +} + + +#define emms_c() \ +{\ + if (mm_flags & MM_MMX)\ + emms();\ +} + +#ifdef __GNUC__ + #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#else + #define DECLARE_ALIGNED_8(t,v) __declspec(align(8)) t v +#endif + +#define STRIDE_ALIGN 8 + +void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); +void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); + +#elif defined(ARCH_ARMV4L) + +/* This is to use 4 bytes read to the IDCT pointers for some 'zero' + line optimizations */ +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (4))) +#define STRIDE_ALIGN 4 + +#define MM_IWMMXT 0x0100 /* XScale IWMMXT */ + +extern int mm_flags; + +void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx); + +#elif defined(HAVE_MLIB) + +/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 + +void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); + +#elif defined(ARCH_SPARC) + +/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 +void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); + +#elif defined(ARCH_ALPHA) + +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 + +void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); + +#elif defined(ARCH_POWERPC) + +#define MM_ALTIVEC 0x0001 /* standard AltiVec */ + +extern int mm_flags; + +#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN) +#define pixel altivec_pixel +#include +#undef pixel +#endif + +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (16))) +#define STRIDE_ALIGN 16 + +void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); + +#elif defined(HAVE_MMI) + +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (16))) +#define STRIDE_ALIGN 16 + +void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); + +#elif defined(ARCH_SH4) + +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 + +void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); + +#else + +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 + +#endif + +#ifdef __GNUC__ + +struct unaligned_64 { uint64_t l; } __attribute__((packed)); +struct unaligned_32 { uint32_t l; } __attribute__((packed)); +struct unaligned_16 { uint16_t l; } __attribute__((packed)); + +#define LD16(a) (((const struct unaligned_16 *) (a))->l) +#define LD32(a) (((const struct unaligned_32 *) (a))->l) +#define LD64(a) (((const struct unaligned_64 *) (a))->l) + +#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b) +#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) + +#else /* __GNUC__ */ + +#define LD16(a) (*((uint16_t*)(a))) +#define LD32(a) (*((uint32_t*)(a))) +#define LD64(a) (*((uint64_t*)(a))) + +#define ST16(a, b) *((uint16_t*)(a)) = (b) +#define ST32(a, b) *((uint32_t*)(a)) = (b) + +#endif /* !__GNUC__ */ + +/* PSNR */ +void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], + int orig_linesize[3], int coded_linesize, + AVCodecContext *avctx); + +/* FFT computation */ + +/* NOTE: soon integer code will be added, so you must use the + FFTSample type */ +typedef float FFTSample; + +typedef struct FFTComplex { + FFTSample re, im; +} FFTComplex; + +typedef struct FFTContext { + int nbits; + int inverse; + uint16_t *revtab; + FFTComplex *exptab; + FFTComplex *exptab1; /* only used by SSE code */ + void (*fft_calc)(struct FFTContext *s, FFTComplex *z); +} FFTContext; + +int ff_fft_init(FFTContext *s, int nbits, int inverse); +void ff_fft_permute(FFTContext *s, FFTComplex *z); +void ff_fft_calc_c(FFTContext *s, FFTComplex *z); +void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); +void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); +void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); +void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); + +static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) +{ + s->fft_calc(s, z); +} +void ff_fft_end(FFTContext *s); + +/* MDCT computation */ + +typedef struct MDCTContext { + int n; /* size of MDCT (i.e. number of input data * 2) */ + int nbits; /* n = 2^nbits */ + /* pre/post rotation tables */ + FFTSample *tcos; + FFTSample *tsin; + FFTContext fft; +} MDCTContext; + +int ff_mdct_init(MDCTContext *s, int nbits, int inverse); +void ff_imdct_calc(MDCTContext *s, FFTSample *output, + const FFTSample *input, FFTSample *tmp); +void ff_mdct_calc(MDCTContext *s, FFTSample *out, + const FFTSample *input, FFTSample *tmp); +void ff_mdct_end(MDCTContext *s); + +#define WARPER8_16(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ + return name8(s, dst , src , stride, h)\ + +name8(s, dst+8 , src+8 , stride, h);\ +} + +#define WARPER8_16_SQ(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\ + int score=0;\ + score +=name8(s, dst , src , stride, 8);\ + score +=name8(s, dst+8 , src+8 , stride, 8);\ + if(h==16){\ + dst += 8*stride;\ + src += 8*stride;\ + score +=name8(s, dst , src , stride, 8);\ + score +=name8(s, dst+8 , src+8 , stride, 8);\ + }\ + return score;\ +} + +#endif diff --git a/mpeg4/src/libavcodec/dtsdec.c b/mpeg4/src/libavcodec/dtsdec.c new file mode 100644 index 0000000000000000000000000000000000000000..dd3268217ed20437ab0190e868444d73407b8c00 --- /dev/null +++ b/mpeg4/src/libavcodec/dtsdec.c @@ -0,0 +1,320 @@ +/* + * dtsdec.c : free DTS Coherent Acoustics stream decoder. + * Copyright (C) 2004 Benjamin Zores + * + * This file is part of libavcodec. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef HAVE_AV_CONFIG_H +#undef HAVE_AV_CONFIG_H +#endif + +#include "avcodec.h" +#include + +#include +#include + +#ifdef HAVE_MALLOC_H +#include +#endif + +#define BUFFER_SIZE 18726 +#define HEADER_SIZE 14 + +#ifdef LIBDTS_FIXED +#define CONVERT_LEVEL (1 << 26) +#define CONVERT_BIAS 0 +#else +#define CONVERT_LEVEL 1 +#define CONVERT_BIAS 384 +#endif + +static inline +int16_t convert (int32_t i) +{ +#ifdef LIBDTS_FIXED + i >>= 15; +#else + i -= 0x43c00000; +#endif + return (i > 32767) ? 32767 : ((i < -32768) ? -32768 : i); +} + +void +convert2s16_2 (sample_t * _f, int16_t * s16) +{ + int i; + int32_t * f = (int32_t *) _f; + + for (i = 0; i < 256; i++) + { + s16[2*i] = convert (f[i]); + s16[2*i+1] = convert (f[i+256]); + } +} + +void +convert2s16_4 (sample_t * _f, int16_t * s16) +{ + int i; + int32_t * f = (int32_t *) _f; + + for (i = 0; i < 256; i++) + { + s16[4*i] = convert (f[i]); + s16[4*i+1] = convert (f[i+256]); + s16[4*i+2] = convert (f[i+512]); + s16[4*i+3] = convert (f[i+768]); + } +} + +void +convert2s16_5 (sample_t * _f, int16_t * s16) +{ + int i; + int32_t * f = (int32_t *) _f; + + for (i = 0; i < 256; i++) + { + s16[5*i] = convert (f[i]); + s16[5*i+1] = convert (f[i+256]); + s16[5*i+2] = convert (f[i+512]); + s16[5*i+3] = convert (f[i+768]); + s16[5*i+4] = convert (f[i+1024]); + } +} + +static void +convert2s16_multi (sample_t * _f, int16_t * s16, int flags) +{ + int i; + int32_t * f = (int32_t *) _f; + + switch (flags) + { + case DTS_MONO: + for (i = 0; i < 256; i++) + { + s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert (f[i]); + } + break; + case DTS_CHANNEL: + case DTS_STEREO: + case DTS_DOLBY: + convert2s16_2 (_f, s16); + break; + case DTS_3F: + for (i = 0; i < 256; i++) + { + s16[5*i] = convert (f[i]); + s16[5*i+1] = convert (f[i+512]); + s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert (f[i+256]); + } + break; + case DTS_2F2R: + convert2s16_4 (_f, s16); + break; + case DTS_3F2R: + convert2s16_5 (_f, s16); + break; + case DTS_MONO | DTS_LFE: + for (i = 0; i < 256; i++) + { + s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert (f[i+256]); + s16[6*i+5] = convert (f[i]); + } + break; + case DTS_CHANNEL | DTS_LFE: + case DTS_STEREO | DTS_LFE: + case DTS_DOLBY | DTS_LFE: + for (i = 0; i < 256; i++) + { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+512]); + s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; + s16[6*i+5] = convert (f[i]); + } + break; + case DTS_3F | DTS_LFE: + for (i = 0; i < 256; i++) + { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+768]); + s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert (f[i+512]); + s16[6*i+5] = convert (f[i]); + } + break; + case DTS_2F2R | DTS_LFE: + for (i = 0; i < 256; i++) + { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+512]); + s16[6*i+2] = convert (f[i+768]); + s16[6*i+3] = convert (f[i+1024]); + s16[6*i+4] = 0; + s16[6*i+5] = convert (f[i]); + } + break; + case DTS_3F2R | DTS_LFE: + for (i = 0; i < 256; i++) + { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+768]); + s16[6*i+2] = convert (f[i+1024]); + s16[6*i+3] = convert (f[i+1280]); + s16[6*i+4] = convert (f[i+512]); + s16[6*i+5] = convert (f[i]); + } + break; + } +} + +static int +channels_multi (int flags) +{ + if (flags & DTS_LFE) + return 6; + else if (flags & 1) /* center channel */ + return 5; + else if ((flags & DTS_CHANNEL_MASK) == DTS_2F2R) + return 4; + else + return 2; +} + +static int +dts_decode_frame (AVCodecContext *avctx, void *data, int *data_size, + uint8_t *buff, int buff_size) +{ + uint8_t * start = buff; + uint8_t * end = buff + buff_size; + static uint8_t buf[BUFFER_SIZE]; + static uint8_t * bufptr = buf; + static uint8_t * bufpos = buf + HEADER_SIZE; + + static int sample_rate; + static int frame_length; + static int flags; + int bit_rate; + int len; + dts_state_t *state = avctx->priv_data; + + *data_size = 0; + + while (1) + { + len = end - start; + if (!len) + break; + if (len > bufpos - bufptr) + len = bufpos - bufptr; + memcpy (bufptr, start, len); + bufptr += len; + start += len; + if (bufptr != bufpos) + return start - buff; + if (bufpos != buf + HEADER_SIZE) + break; + + { + int length; + + length = dts_syncinfo (state, buf, &flags, &sample_rate, + &bit_rate, &frame_length); + if (!length) + { + av_log (NULL, AV_LOG_INFO, "skip\n"); + for (bufptr = buf; bufptr < buf + HEADER_SIZE-1; bufptr++) + bufptr[0] = bufptr[1]; + continue; + } + bufpos = buf + length; + } + } + + { + level_t level; + sample_t bias; + int i; + + flags = 2; /* ???????????? */ + level = CONVERT_LEVEL; + bias = CONVERT_BIAS; + + flags |= DTS_ADJUST_LEVEL; + if (dts_frame (state, buf, &flags, &level, bias)) + goto error; + avctx->sample_rate = sample_rate; + avctx->channels = channels_multi (flags); + avctx->bit_rate = bit_rate; + for (i = 0; i < dts_blocks_num (state); i++) + { + if (dts_block (state)) + goto error; + { + int chans; + chans = channels_multi (flags); + convert2s16_multi (dts_samples (state), data, + flags & (DTS_CHANNEL_MASK | DTS_LFE)); + + data += 256 * sizeof (int16_t) * chans; + *data_size += 256 * sizeof (int16_t) * chans; + } + } + bufptr = buf; + bufpos = buf + HEADER_SIZE; + return start-buff; + error: + av_log (NULL, AV_LOG_ERROR, "error\n"); + bufptr = buf; + bufpos = buf + HEADER_SIZE; + } + + return start-buff; +} + +static int +dts_decode_init (AVCodecContext *avctx) +{ + avctx->priv_data = dts_init (0); + if (avctx->priv_data == NULL) + return -1; + + return 0; +} + +static int +dts_decode_end (AVCodecContext *s) +{ + return 0; +} + +AVCodec dts_decoder = { + "dts", + CODEC_TYPE_AUDIO, + CODEC_ID_DTS, + sizeof (dts_state_t *), + dts_decode_init, + NULL, + dts_decode_end, + dts_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/dv.c b/mpeg4/src/libavcodec/dv.c new file mode 100644 index 0000000000000000000000000000000000000000..c39d70c54222720d6bb38361073c63f448316a98 --- /dev/null +++ b/mpeg4/src/libavcodec/dv.c @@ -0,0 +1,1147 @@ +/* + * DV decoder + * Copyright (c) 2002 Fabrice Bellard. + * Copyright (c) 2004 Roman Shaposhnik. + * + * DV encoder + * Copyright (c) 2003 Roman Shaposhnik. + * + * 50 Mbps (DVCPRO50) support + * Copyright (c) 2006 Daniel Maas + * + * Many thanks to Dan Dennedy for providing wealth + * of DV technical info. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file dv.c + * DV codec. + */ +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" +#include "simple_idct.h" +#include "dvdata.h" + +//#undef NDEBUG +//#include + +typedef struct DVVideoContext { + const DVprofile* sys; + AVFrame picture; + AVCodecContext *avctx; + uint8_t *buf; + + uint8_t dv_zigzag[2][64]; + uint8_t dv_idct_shift[2][2][22][64]; + + void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); + void (*fdct[2])(DCTELEM *block); + void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); +} DVVideoContext; + +/* MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext */ +/* one element is needed for each video segment in a DV frame */ +/* at most there are 2 DIF channels * 12 DIF sequences * 27 video segments (PAL 50Mbps) */ +#define DV_ANCHOR_SIZE (2*12*27) + +static void* dv_anchor[DV_ANCHOR_SIZE]; + +#define TEX_VLC_BITS 9 + +#ifdef DV_CODEC_TINY_TARGET +#define DV_VLC_MAP_RUN_SIZE 15 +#define DV_VLC_MAP_LEV_SIZE 23 +#else +#define DV_VLC_MAP_RUN_SIZE 64 +#define DV_VLC_MAP_LEV_SIZE 512 //FIXME sign was removed so this should be /2 but needs check +#endif + +/* XXX: also include quantization */ +static RL_VLC_ELEM *dv_rl_vlc; +/* VLC encoding lookup table */ +static struct dv_vlc_pair { + uint32_t vlc; + uint8_t size; +} (*dv_vlc_map)[DV_VLC_MAP_LEV_SIZE] = NULL; + +static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm) +{ + int i, q, j; + + /* NOTE: max left shift is 6 */ + for(q = 0; q < 22; q++) { + /* 88DCT */ + for(i = 1; i < 64; i++) { + /* 88 table */ + j = perm[i]; + s->dv_idct_shift[0][0][q][j] = + dv_quant_shifts[q][dv_88_areas[i]] + 1; + s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; + } + + /* 248DCT */ + for(i = 1; i < 64; i++) { + /* 248 table */ + s->dv_idct_shift[0][1][q][i] = + dv_quant_shifts[q][dv_248_areas[i]] + 1; + s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; + } + } +} + +static int dvvideo_init(AVCodecContext *avctx) +{ + DVVideoContext *s = avctx->priv_data; + DSPContext dsp; + static int done=0; + int i, j; + + if (!done) { + VLC dv_vlc; + uint16_t new_dv_vlc_bits[NB_DV_VLC*2]; + uint8_t new_dv_vlc_len[NB_DV_VLC*2]; + uint8_t new_dv_vlc_run[NB_DV_VLC*2]; + int16_t new_dv_vlc_level[NB_DV_VLC*2]; + + done = 1; + + dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); + if (!dv_vlc_map) + return -ENOMEM; + + /* dv_anchor lets each thread know its Id */ + for (i=0; i= DV_VLC_MAP_RUN_SIZE) + continue; +#ifdef DV_CODEC_TINY_TARGET + if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE) + continue; +#endif + + if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0) + continue; + + dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << + (!!dv_vlc_level[i]); + dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + + (!!dv_vlc_level[i]); + } + for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) { +#ifdef DV_CODEC_TINY_TARGET + for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) { + if (dv_vlc_map[i][j].size == 0) { + dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | + (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); + dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + + dv_vlc_map[0][j].size; + } + } +#else + for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) { + if (dv_vlc_map[i][j].size == 0) { + dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | + (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); + dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + + dv_vlc_map[0][j].size; + } + dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = + dv_vlc_map[i][j].vlc | 1; + dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = + dv_vlc_map[i][j].size; + } +#endif + } + } + + /* Generic DSP setup */ + dsputil_init(&dsp, avctx); + s->get_pixels = dsp.get_pixels; + + /* 88DCT setup */ + s->fdct[0] = dsp.fdct; + s->idct_put[0] = dsp.idct_put; + for (i=0; i<64; i++) + s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]]; + + /* 248DCT setup */ + s->fdct[1] = dsp.fdct248; + s->idct_put[1] = simple_idct248_put; // FIXME: need to add it to DSP + if(avctx->lowres){ + for (i=0; i<64; i++){ + int j= ff_zigzag248_direct[i]; + s->dv_zigzag[1][i] = dsp.idct_permutation[(j&7) + (j&8)*4 + (j&48)/2]; + } + }else + memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64); + + /* XXX: do it only for constant case */ + dv_build_unquantize_tables(s, dsp.idct_permutation); + + avctx->coded_frame = &s->picture; + s->avctx= avctx; + + return 0; +} + +// #define VLC_DEBUG +// #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__) + +typedef struct BlockInfo { + const uint8_t *shift_table; + const uint8_t *scan_table; + const int *iweight_table; + uint8_t pos; /* position in block */ + uint8_t dct_mode; + uint8_t partial_bit_count; + uint16_t partial_bit_buffer; + int shift_offset; +} BlockInfo; + +/* block size in bits */ +static const uint16_t block_sizes[6] = { + 112, 112, 112, 112, 80, 80 +}; +/* bit budget for AC only in 5 MBs */ +static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5; +/* see dv_88_areas and dv_248_areas for details */ +static const int mb_area_start[5] = { 1, 6, 21, 43, 64 }; + +#ifndef ALT_BITSTREAM_READER +#warning only works with ALT_BITSTREAM_READER +static int re_index; //Hack to make it compile +#endif + +static inline int get_bits_left(GetBitContext *s) +{ + return s->size_in_bits - get_bits_count(s); +} + +static inline int get_bits_size(GetBitContext *s) +{ + return s->size_in_bits; +} + +static inline int put_bits_left(PutBitContext* s) +{ + return (s->buf_end - s->buf) * 8 - put_bits_count(s); +} + +/* decode ac coefs */ +static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) +{ + int last_index = get_bits_size(gb); + const uint8_t *scan_table = mb->scan_table; + const uint8_t *shift_table = mb->shift_table; + const int *iweight_table = mb->iweight_table; + int pos = mb->pos; + int partial_bit_count = mb->partial_bit_count; + int level, pos1, run, vlc_len, index; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + + /* if we must parse a partial vlc, we do it here */ + if (partial_bit_count > 0) { + re_cache = ((unsigned)re_cache >> partial_bit_count) | + (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); + re_index -= partial_bit_count; + mb->partial_bit_count = 0; + } + + /* get the AC coefficients until last_index is reached */ + for(;;) { +#ifdef VLC_DEBUG + printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index); +#endif + /* our own optimized GET_RL_VLC */ + index = NEG_USR32(re_cache, TEX_VLC_BITS); + vlc_len = dv_rl_vlc[index].len; + if (vlc_len < 0) { + index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level; + vlc_len = TEX_VLC_BITS - vlc_len; + } + level = dv_rl_vlc[index].level; + run = dv_rl_vlc[index].run; + + /* gotta check if we're still within gb boundaries */ + if (re_index + vlc_len > last_index) { + /* should be < 16 bits otherwise a codeword could have been parsed */ + mb->partial_bit_count = last_index - re_index; + mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); + re_index = last_index; + break; + } + re_index += vlc_len; + +#ifdef VLC_DEBUG + printf("run=%d level=%d\n", run, level); +#endif + pos += run; + if (pos >= 64) + break; + + pos1 = scan_table[pos]; + level <<= shift_table[pos1]; + + /* unweigh, round, and shift down */ + level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits; + + block[pos1] = level; + + UPDATE_CACHE(re, gb); + } + CLOSE_READER(re, gb); + mb->pos = pos; +} + +static inline void bit_copy(PutBitContext *pb, GetBitContext *gb) +{ + int bits_left = get_bits_left(gb); + while (bits_left >= MIN_CACHE_BITS) { + put_bits(pb, MIN_CACHE_BITS, get_bits(gb, MIN_CACHE_BITS)); + bits_left -= MIN_CACHE_BITS; + } + if (bits_left > 0) { + put_bits(pb, bits_left, get_bits(gb, bits_left)); + } +} + +/* mb_x and mb_y are in units of 8 pixels */ +static inline void dv_decode_video_segment(DVVideoContext *s, + uint8_t *buf_ptr1, + const uint16_t *mb_pos_ptr) +{ + int quant, dc, dct_mode, class1, j; + int mb_index, mb_x, mb_y, v, last_index; + DCTELEM *block, *block1; + int c_offset; + uint8_t *y_ptr; + void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block); + uint8_t *buf_ptr; + PutBitContext pb, vs_pb; + GetBitContext gb; + BlockInfo mb_data[5 * 6], *mb, *mb1; + DECLARE_ALIGNED_8(DCTELEM, sblock[5*6][64]); + DECLARE_ALIGNED_8(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */ + DECLARE_ALIGNED_8(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */ + const int log2_blocksize= 3-s->avctx->lowres; + + assert((((int)mb_bit_buffer)&7)==0); + assert((((int)vs_bit_buffer)&7)==0); + + memset(sblock, 0, sizeof(sblock)); + + /* pass 1 : read DC and AC coefficients in blocks */ + buf_ptr = buf_ptr1; + block1 = &sblock[0][0]; + mb1 = mb_data; + init_put_bits(&vs_pb, vs_bit_buffer, 5 * 80); + for(mb_index = 0; mb_index < 5; mb_index++, mb1 += 6, block1 += 6 * 64) { + /* skip header */ + quant = buf_ptr[3] & 0x0f; + buf_ptr += 4; + init_put_bits(&pb, mb_bit_buffer, 80); + mb = mb1; + block = block1; + for(j = 0;j < 6; j++) { + last_index = block_sizes[j]; + init_get_bits(&gb, buf_ptr, last_index); + + /* get the dc */ + dc = get_sbits(&gb, 9); + dct_mode = get_bits1(&gb); + mb->dct_mode = dct_mode; + mb->scan_table = s->dv_zigzag[dct_mode]; + mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88; + class1 = get_bits(&gb, 2); + mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] + [quant + dv_quant_offset[class1]]; + dc = dc << 2; + /* convert to unsigned because 128 is not added in the + standard IDCT */ + dc += 1024; + block[0] = dc; + buf_ptr += last_index >> 3; + mb->pos = 0; + mb->partial_bit_count = 0; + +#ifdef VLC_DEBUG + printf("MB block: %d, %d ", mb_index, j); +#endif + dv_decode_ac(&gb, mb, block); + + /* write the remaining bits in a new buffer only if the + block is finished */ + if (mb->pos >= 64) + bit_copy(&pb, &gb); + + block += 64; + mb++; + } + + /* pass 2 : we can do it just after */ +#ifdef VLC_DEBUG + printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index); +#endif + block = block1; + mb = mb1; + init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb)); + flush_put_bits(&pb); + for(j = 0;j < 6; j++, block += 64, mb++) { + if (mb->pos < 64 && get_bits_left(&gb) > 0) { + dv_decode_ac(&gb, mb, block); + /* if still not finished, no need to parse other blocks */ + if (mb->pos < 64) + break; + } + } + /* all blocks are finished, so the extra bytes can be used at + the video segment level */ + if (j >= 6) + bit_copy(&vs_pb, &gb); + } + + /* we need a pass other the whole video segment */ +#ifdef VLC_DEBUG + printf("***pass 3 size=%d\n", put_bits_count(&vs_pb)); +#endif + block = &sblock[0][0]; + mb = mb_data; + init_get_bits(&gb, vs_bit_buffer, put_bits_count(&vs_pb)); + flush_put_bits(&vs_pb); + for(mb_index = 0; mb_index < 5; mb_index++) { + for(j = 0;j < 6; j++) { + if (mb->pos < 64) { +#ifdef VLC_DEBUG + printf("start %d:%d\n", mb_index, j); +#endif + dv_decode_ac(&gb, mb, block); + } + if (mb->pos >= 64 && mb->pos < 127) + av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos); + block += 64; + mb++; + } + } + + /* compute idct and place blocks */ + block = &sblock[0][0]; + mb = mb_data; + for(mb_index = 0; mb_index < 5; mb_index++) { + v = *mb_pos_ptr++; + mb_x = v & 0xff; + mb_y = v >> 8; + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { + y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + (mb_x>>1))<picture.linesize[1] + (mb_x >> 2))<picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<sys->pix_fmt == PIX_FMT_YUV411P) + c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<> 1) * s->picture.linesize[1] + (mb_x >> 1))<idct_put[mb->dct_mode && log2_blocksize==3]; + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ + if (j == 0 || j == 2) { + /* Y0 Y1 */ + idct_put(y_ptr + ((j >> 1)<picture.linesize[0], block); + } else if(j > 3) { + /* Cr Cb */ + idct_put(s->picture.data[6 - j] + c_offset, + s->picture.linesize[6 - j], block); + } + /* note: j=1 and j=3 are "dummy" blocks in 4:2:2 */ + } else { /* 4:1:1 or 4:2:0 */ + if (j < 4) { + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(y_ptr + (j<picture.linesize[0], block); + } else { + idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<picture.linesize[0], block); + } + } else { + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { + uint64_t aligned_pixels[64/8]; + uint8_t *pixels= (uint8_t*)aligned_pixels; + uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; + int x, y, linesize; + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(pixels, 8, block); + linesize = s->picture.linesize[6 - j]; + c_ptr = s->picture.data[6 - j] + c_offset; + ptr = pixels; + for(y = 0;y < (1<picture.data[6 - j] + c_offset, + s->picture.linesize[6 - j], block); + } + } + } + block += 64; + mb++; + } + } +} + +#ifdef DV_CODEC_TINY_TARGET +/* Converts run and level (where level != 0) pair into vlc, returning bit size */ +static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc) +{ + int size; + if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { + *vlc = dv_vlc_map[run][level].vlc | sign; + size = dv_vlc_map[run][level].size; + } + else { + if (level < DV_VLC_MAP_LEV_SIZE) { + *vlc = dv_vlc_map[0][level].vlc | sign; + size = dv_vlc_map[0][level].size; + } else { + *vlc = 0xfe00 | (level << 1) | sign; + size = 16; + } + if (run) { + *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : + (0x1f80 | (run - 1))) << size; + size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; + } + } + + return size; +} + +static always_inline int dv_rl2vlc_size(int run, int level) +{ + int size; + + if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { + size = dv_vlc_map[run][level].size; + } + else { + size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16; + if (run) { + size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; + } + } + return size; +} +#else +static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc) +{ + *vlc = dv_vlc_map[run][l].vlc | sign; + return dv_vlc_map[run][l].size; +} + +static always_inline int dv_rl2vlc_size(int run, int l) +{ + return dv_vlc_map[run][l].size; +} +#endif + +typedef struct EncBlockInfo { + int area_q[4]; + int bit_size[4]; + int prev[5]; + int cur_ac; + int cno; + int dct_mode; + DCTELEM mb[64]; + uint8_t next[64]; + uint8_t sign[64]; + uint8_t partial_bit_count; + uint32_t partial_bit_buffer; /* we can't use uint16_t here */ +} EncBlockInfo; + +static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, + PutBitContext* pb_end) +{ + int prev; + int bits_left; + PutBitContext* pb = pb_pool; + int size = bi->partial_bit_count; + uint32_t vlc = bi->partial_bit_buffer; + + bi->partial_bit_count = bi->partial_bit_buffer = 0; + for(;;){ + /* Find suitable storage space */ + for (; size > (bits_left = put_bits_left(pb)); pb++) { + if (bits_left) { + size -= bits_left; + put_bits(pb, bits_left, vlc >> size); + vlc = vlc & ((1<= pb_end) { + bi->partial_bit_count = size; + bi->partial_bit_buffer = vlc; + return pb; + } + } + + /* Store VLC */ + put_bits(pb, size, vlc); + + if(bi->cur_ac>=64) + break; + + /* Construct the next VLC */ + prev= bi->cur_ac; + bi->cur_ac = bi->next[prev]; + if(bi->cur_ac < 64){ + size = dv_rl2vlc(bi->cur_ac - prev - 1, bi->mb[bi->cur_ac], bi->sign[bi->cur_ac], &vlc); + } else { + size = 4; vlc = 6; /* End Of Block stamp */ + } + } + return pb; +} + +static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, + const uint8_t* zigzag_scan, const int *weight, int bias) +{ + int i, area; + /* We offer two different methods for class number assignment: the + method suggested in SMPTE 314M Table 22, and an improved + method. The SMPTE method is very conservative; it assigns class + 3 (i.e. severe quantization) to any block where the largest AC + component is greater than 36. ffmpeg's DV encoder tracks AC bit + consumption precisely, so there is no need to bias most blocks + towards strongly lossy compression. Instead, we assign class 2 + to most blocks, and use class 3 only when strictly necessary + (for blocks whose largest AC component exceeds 255). */ + +#if 0 /* SMPTE spec method */ + static const int classes[] = {12, 24, 36, 0xffff}; +#else /* improved ffmpeg method */ + static const int classes[] = {-1, -1, 255, 0xffff}; +#endif + int max=classes[0]; + int prev=0; + + bi->mb[0] = blk[0]; + + for (area = 0; area < 4; area++) { + bi->prev[area] = prev; + bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :) + for (i=mb_area_start[area]; i 30U) { + bi->sign[i] = (level>>31)&1; + /* weigh it and and shift down into range, adding for rounding */ + /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT + AND the 2x doubling of the weights */ + level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4); + bi->mb[i] = level; + if(level>max) max= level; + bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level); + bi->next[prev]= i; + prev= i; + } + } + } + bi->next[prev]= i; + for(bi->cno = 0; max > classes[bi->cno]; bi->cno++); + + bi->cno += bias; + + if (bi->cno >= 3) { + bi->cno = 3; + prev=0; + i= bi->next[prev]; + for (area = 0; area < 4; area++) { + bi->prev[area] = prev; + bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :) + for (; inext[i]) { + bi->mb[i] >>=1; + + if (bi->mb[i]) { + bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]); + bi->next[prev]= i; + prev= i; + } + } + } + bi->next[prev]= i; + } +} + +//FIXME replace this by dsputil +#define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7)) +static always_inline int dv_guess_dct_mode(DCTELEM *blk) { + DCTELEM *s; + int score88 = 0; + int score248 = 0; + int i; + + /* Compute 8-8 score (small values give a better chance for 8-8 DCT) */ + s = blk; + for(i=0; i<7; i++) { + score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) + + SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15); + s += 8; + } + /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */ + s = blk; + for(i=0; i<6; i++) { + score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) + + SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23); + s += 8; + } + + return (score88 - score248 > -10); +} + +static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) +{ + int size[5]; + int i, j, k, a, prev, a2; + EncBlockInfo* b; + + size[0] = size[1] = size[2] = size[3] = size[4] = 1<<24; + do { + b = blks; + for (i=0; i<5; i++) { + if (!qnos[i]) + continue; + + qnos[i]--; + size[i] = 0; + for (j=0; j<6; j++, b++) { + for (a=0; a<4; a++) { + if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { + b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) + b->area_q[a]++; + prev= b->prev[a]; + assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]); + for (k= b->next[prev] ; knext[k]) { + b->mb[k] >>= 1; + if (b->mb[k]) { + b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); + prev= k; + } else { + if(b->next[k] >= mb_area_start[a+1] && b->next[k]<64){ + for(a2=a+1; b->next[k] >= mb_area_start[a2+1]; a2++) + b->prev[a2] = prev; + assert(a2<4); + assert(b->mb[b->next[k]]); + b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]]) + -dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]); + assert(b->prev[a2]==k && (a2+1 >= 4 || b->prev[a2+1]!=k)); + b->prev[a2] = prev; + } + b->next[prev] = b->next[k]; + } + } + b->prev[a+1]= prev; + } + size[i] += b->bit_size[a]; + } + } + if(vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4]) + return; + } + } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]); + + + for(a=2; a==2 || vs_total_ac_bits < size[0]; a+=a){ + b = blks; + size[0] = 5*6*4; //EOB + for (j=0; j<6*5; j++, b++) { + prev= b->prev[0]; + for (k= b->next[prev]; k<64; k= b->next[k]) { + if(b->mb[k] < a && b->mb[k] > -a){ + b->next[prev] = b->next[k]; + }else{ + size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); + prev= k; + } + } + } + } +} + +static inline void dv_encode_video_segment(DVVideoContext *s, + uint8_t *dif, + const uint16_t *mb_pos_ptr) +{ + int mb_index, i, j, v; + int mb_x, mb_y, c_offset, linesize; + uint8_t* y_ptr; + uint8_t* data; + uint8_t* ptr; + int do_edge_wrap; + DECLARE_ALIGNED_8(DCTELEM, block[64]); + EncBlockInfo enc_blks[5*6]; + PutBitContext pbs[5*6]; + PutBitContext* pb; + EncBlockInfo* enc_blk; + int vs_bit_size = 0; + int qnos[5]; + + assert((((int)block) & 7) == 0); + + enc_blk = &enc_blks[0]; + pb = &pbs[0]; + for(mb_index = 0; mb_index < 5; mb_index++) { + v = *mb_pos_ptr++; + mb_x = v & 0xff; + mb_y = v >> 8; + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { + y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 4); + } else { /* 4:1:1 */ + y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); + } + if (s->sys->pix_fmt == PIX_FMT_YUV420P) { + c_offset = (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); + } else { /* 4:2:2 or 4:1:1 */ + c_offset = ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)); + } + do_edge_wrap = 0; + qnos[mb_index] = 15; /* No quantization */ + ptr = dif + mb_index*80 + 4; + for(j = 0;j < 6; j++) { + int dummy = 0; + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ + if (j == 0 || j == 2) { + /* Y0 Y1 */ + data = y_ptr + ((j>>1) * 8); + linesize = s->picture.linesize[0]; + } else if (j > 3) { + /* Cr Cb */ + data = s->picture.data[6 - j] + c_offset; + linesize = s->picture.linesize[6 - j]; + } else { + /* j=1 and j=3 are "dummy" blocks, used for AC data only */ + data = 0; + linesize = 0; + dummy = 1; + } + } else { /* 4:1:1 or 4:2:0 */ + if (j < 4) { /* Four Y blocks */ + /* NOTE: at end of line, the macroblock is handled as 420 */ + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { + data = y_ptr + (j * 8); + } else { + data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); + } + linesize = s->picture.linesize[0]; + } else { /* Cr and Cb blocks */ + /* don't ask Fabrice why they inverted Cb and Cr ! */ + data = s->picture.data[6 - j] + c_offset; + linesize = s->picture.linesize[6 - j]; + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) + do_edge_wrap = 1; + } + } + + /* Everything is set up -- now just copy data -> DCT block */ + if (do_edge_wrap) { /* Edge wrap copy: 4x16 -> 8x8 */ + uint8_t* d; + DCTELEM *b = block; + for (i=0;i<8;i++) { + d = data + 8 * linesize; + b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3]; + b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; + data += linesize; + b += 8; + } + } else { /* Simple copy: 8x8 -> 8x8 */ + if (!dummy) + s->get_pixels(block, data, linesize); + } + + if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) + enc_blk->dct_mode = dv_guess_dct_mode(block); + else + enc_blk->dct_mode = 0; + enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; + enc_blk->partial_bit_count = 0; + enc_blk->partial_bit_buffer = 0; + enc_blk->cur_ac = 0; + + if (dummy) { + /* We rely on the fact that encoding all zeros leads to an immediate EOB, + which is precisely what the spec calls for in the "dummy" blocks. */ + memset(block, 0, sizeof(block)); + } else { + s->fdct[enc_blk->dct_mode](block); + } + + dv_set_class_number(block, enc_blk, + enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, + enc_blk->dct_mode ? dv_weight_248 : dv_weight_88, + j/4); + + init_put_bits(pb, ptr, block_sizes[j]/8); + put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); + put_bits(pb, 1, enc_blk->dct_mode); + put_bits(pb, 2, enc_blk->cno); + + vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + + enc_blk->bit_size[2] + enc_blk->bit_size[3]; + ++enc_blk; + ++pb; + ptr += block_sizes[j]/8; + } + } + + if (vs_total_ac_bits < vs_bit_size) + dv_guess_qnos(&enc_blks[0], &qnos[0]); + + for (i=0; i<5; i++) { + dif[i*80 + 3] = qnos[i]; + } + + /* First pass over individual cells only */ + for (j=0; j<5*6; j++) + dv_encode_ac(&enc_blks[j], &pbs[j], &pbs[j+1]); + + /* Second pass over each MB space */ + for (j=0; j<5*6; j+=6) { + pb= &pbs[j]; + for (i=0; i<6; i++) { + if (enc_blks[i+j].partial_bit_count) + pb=dv_encode_ac(&enc_blks[i+j], pb, &pbs[j+6]); + } + } + + /* Third and final pass over the whole vides segment space */ + pb= &pbs[0]; + for (j=0; j<5*6; j++) { + if (enc_blks[j].partial_bit_count) + pb=dv_encode_ac(&enc_blks[j], pb, &pbs[6*5]); + if (enc_blks[j].partial_bit_count) + av_log(NULL, AV_LOG_ERROR, "ac bitstream overflow\n"); + } + + for (j=0; j<5*6; j++) + flush_put_bits(&pbs[j]); +} + +static int dv_decode_mt(AVCodecContext *avctx, void* sl) +{ + DVVideoContext *s = avctx->priv_data; + int slice = (size_t)sl; + + /* which DIF channel is this? */ + int chan = slice / (s->sys->difseg_size * 27); + + /* slice within the DIF channel */ + int chan_slice = slice % (s->sys->difseg_size * 27); + + /* byte offset of this channel's data */ + int chan_offset = chan * s->sys->difseg_size * 150 * 80; + + dv_decode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset], + &s->sys->video_place[slice*5]); + return 0; +} + +static int dv_encode_mt(AVCodecContext *avctx, void* sl) +{ + DVVideoContext *s = avctx->priv_data; + int slice = (size_t)sl; + + /* which DIF channel is this? */ + int chan = slice / (s->sys->difseg_size * 27); + + /* slice within the DIF channel */ + int chan_slice = slice % (s->sys->difseg_size * 27); + + /* byte offset of this channel's data */ + int chan_offset = chan * s->sys->difseg_size * 150 * 80; + + dv_encode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset], + &s->sys->video_place[slice*5]); + return 0; +} + +/* NOTE: exactly one frame must be given (120000 bytes for NTSC, + 144000 bytes for PAL - or twice those for 50Mbps) */ +static int dvvideo_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + DVVideoContext *s = avctx->priv_data; + + s->sys = dv_frame_profile(buf); + if (!s->sys || buf_size < s->sys->frame_size) + return -1; /* NOTE: we only accept several full frames */ + + if(s->picture.data[0]) + avctx->release_buffer(avctx, &s->picture); + + s->picture.reference = 0; + s->picture.key_frame = 1; + s->picture.pict_type = FF_I_TYPE; + avctx->pix_fmt = s->sys->pix_fmt; + avcodec_set_dimensions(avctx, s->sys->width, s->sys->height); + if(avctx->get_buffer(avctx, &s->picture) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + s->picture.interlaced_frame = 1; + s->picture.top_field_first = 0; + + s->buf = buf; + avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, + s->sys->n_difchan * s->sys->difseg_size * 27); + + emms_c(); + + /* return image */ + *data_size = sizeof(AVFrame); + *(AVFrame*)data= s->picture; + + return s->sys->frame_size; +} + +static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, + void *data) +{ + DVVideoContext *s = c->priv_data; + + s->sys = dv_codec_profile(c); + if (!s->sys) + return -1; + if(buf_size < s->sys->frame_size) + return -1; + + c->pix_fmt = s->sys->pix_fmt; + s->picture = *((AVFrame *)data); + s->picture.key_frame = 1; + s->picture.pict_type = FF_I_TYPE; + + s->buf = buf; + c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, + s->sys->n_difchan * s->sys->difseg_size * 27); + + emms_c(); + + /* Fill in just enough of the header for dv_frame_profile() to + return the correct result, so that the frame can be decoded + correctly. The rest of the metadata is filled in by the dvvideo + avformat. (this should probably change so that encode_frame() + fills in ALL of the metadata - e.g. for Quicktime-wrapped DV + streams) */ + + /* NTSC/PAL format */ + buf[3] = s->sys->dsf ? 0x80 : 0x00; + + /* 25Mbps or 50Mbps */ + buf[80*5 + 48 + 3] = (s->sys->pix_fmt == PIX_FMT_YUV422P) ? 0x4 : 0x0; + + return s->sys->frame_size; +} + +static int dvvideo_close(AVCodecContext *c) +{ + + return 0; +} + + +#ifdef CONFIG_DVVIDEO_ENCODER +AVCodec dvvideo_encoder = { + "dvvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_DVVIDEO, + sizeof(DVVideoContext), + dvvideo_init, + dvvideo_encode_frame, + dvvideo_close, + NULL, + CODEC_CAP_DR1, + NULL +}; +#endif // CONFIG_DVVIDEO_ENCODER + +AVCodec dvvideo_decoder = { + "dvvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_DVVIDEO, + sizeof(DVVideoContext), + dvvideo_init, + NULL, + dvvideo_close, + dvvideo_decode_frame, + CODEC_CAP_DR1, + NULL +}; diff --git a/mpeg4/src/libavcodec/dvbsub.c b/mpeg4/src/libavcodec/dvbsub.c new file mode 100644 index 0000000000000000000000000000000000000000..1760199085b9b818638a2150ad9f505bcb20f4eb --- /dev/null +++ b/mpeg4/src/libavcodec/dvbsub.c @@ -0,0 +1,443 @@ +/* + * DVB subtitle encoding for ffmpeg + * Copyright (c) 2005 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "avcodec.h" + +typedef struct DVBSubtitleContext { + int hide_state; + int object_version; +} DVBSubtitleContext; + +#define PUTBITS2(val)\ +{\ + bitbuf |= (val) << bitcnt;\ + bitcnt -= 2;\ + if (bitcnt < 0) {\ + bitcnt = 6;\ + *q++ = bitbuf;\ + bitbuf = 0;\ + }\ +} + +static void dvb_encode_rle2(uint8_t **pq, + const uint8_t *bitmap, int linesize, + int w, int h) +{ + uint8_t *q; + unsigned int bitbuf; + int bitcnt; + int x, y, len, x1, v, color; + + q = *pq; + + for(y = 0; y < h; y++) { + *q++ = 0x10; + bitbuf = 0; + bitcnt = 6; + + x = 0; + while (x < w) { + x1 = x; + color = bitmap[x1++]; + while (x1 < w && bitmap[x1] == color) + x1++; + len = x1 - x; + if (color == 0 && len == 2) { + PUTBITS2(0); + PUTBITS2(0); + PUTBITS2(1); + } else if (len >= 3 && len <= 10) { + v = len - 3; + PUTBITS2(0); + PUTBITS2((v >> 2) | 2); + PUTBITS2(v & 3); + PUTBITS2(color); + } else if (len >= 12 && len <= 27) { + v = len - 12; + PUTBITS2(0); + PUTBITS2(0); + PUTBITS2(2); + PUTBITS2(v >> 2); + PUTBITS2(v & 3); + PUTBITS2(color); + } else if (len >= 29) { + /* length = 29 ... 284 */ + if (len > 284) + len = 284; + v = len - 29; + PUTBITS2(0); + PUTBITS2(0); + PUTBITS2(3); + PUTBITS2((v >> 6)); + PUTBITS2((v >> 4) & 3); + PUTBITS2((v >> 2) & 3); + PUTBITS2(v & 3); + PUTBITS2(color); + } else { + PUTBITS2(color); + if (color == 0) { + PUTBITS2(1); + } + len = 1; + } + x += len; + } + /* end of line */ + PUTBITS2(0); + PUTBITS2(0); + PUTBITS2(0); + if (bitcnt != 6) { + *q++ = bitbuf; + } + *q++ = 0xf0; + bitmap += linesize; + } + *pq = q; +} + +#define PUTBITS4(val)\ +{\ + bitbuf |= (val) << bitcnt;\ + bitcnt -= 4;\ + if (bitcnt < 0) {\ + bitcnt = 4;\ + *q++ = bitbuf;\ + bitbuf = 0;\ + }\ +} + +/* some DVB decoders only implement 4 bits/pixel */ +static void dvb_encode_rle4(uint8_t **pq, + const uint8_t *bitmap, int linesize, + int w, int h) +{ + uint8_t *q; + unsigned int bitbuf; + int bitcnt; + int x, y, len, x1, v, color; + + q = *pq; + + for(y = 0; y < h; y++) { + *q++ = 0x11; + bitbuf = 0; + bitcnt = 4; + + x = 0; + while (x < w) { + x1 = x; + color = bitmap[x1++]; + while (x1 < w && bitmap[x1] == color) + x1++; + len = x1 - x; + if (color == 0 && len == 2) { + PUTBITS4(0); + PUTBITS4(0xd); + } else if (color == 0 && (len >= 3 && len <= 9)) { + PUTBITS4(0); + PUTBITS4(len - 2); + } else if (len >= 4 && len <= 7) { + PUTBITS4(0); + PUTBITS4(8 + len - 4); + PUTBITS4(color); + } else if (len >= 9 && len <= 24) { + PUTBITS4(0); + PUTBITS4(0xe); + PUTBITS4(len - 9); + PUTBITS4(color); + } else if (len >= 25) { + if (len > 280) + len = 280; + v = len - 25; + PUTBITS4(0); + PUTBITS4(0xf); + PUTBITS4(v >> 4); + PUTBITS4(v & 0xf); + PUTBITS4(color); + } else { + PUTBITS4(color); + if (color == 0) { + PUTBITS4(0xc); + } + len = 1; + } + x += len; + } + /* end of line */ + PUTBITS4(0); + PUTBITS4(0); + if (bitcnt != 4) { + *q++ = bitbuf; + } + *q++ = 0xf0; + bitmap += linesize; + } + *pq = q; +} + +#define SCALEBITS 10 +#define ONE_HALF (1 << (SCALEBITS - 1)) +#define FIX(x) ((int) ((x) * (1<> SCALEBITS) + +#define RGB_TO_U_CCIR(r1, g1, b1, shift)\ +(((- FIX(0.16874*224.0/255.0) * r1 - FIX(0.33126*224.0/255.0) * g1 + \ + FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_V_CCIR(r1, g1, b1, shift)\ +(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 - \ + FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +static inline void putbe16(uint8_t **pq, uint16_t v) +{ + uint8_t *q; + q = *pq; + *q++ = v >> 8; + *q++ = v; + *pq = q; +} + +static int encode_dvb_subtitles(DVBSubtitleContext *s, + uint8_t *outbuf, AVSubtitle *h) +{ + uint8_t *q, *pseg_len; + int page_id, region_id, clut_id, object_id, i, bpp_index, page_state; + + + q = outbuf; + + page_id = 1; + + if (h->num_rects == 0 || h->rects == NULL) + return -1; + + *q++ = 0x00; /* subtitle_stream_id */ + + /* page composition segment */ + + *q++ = 0x0f; /* sync_byte */ + *q++ = 0x10; /* segment_type */ + putbe16(&q, page_id); + pseg_len = q; + q += 2; /* segment length */ + *q++ = 30; /* page_timeout (seconds) */ + if (s->hide_state) + page_state = 0; /* normal case */ + else + page_state = 2; /* mode change */ + /* page_version = 0 + page_state */ + *q++ = s->object_version | (page_state << 2) | 3; + + for (region_id = 0; region_id < h->num_rects; region_id++) { + *q++ = region_id; + *q++ = 0xff; /* reserved */ + putbe16(&q, h->rects[region_id].x); /* left pos */ + putbe16(&q, h->rects[region_id].y); /* top pos */ + } + + putbe16(&pseg_len, q - pseg_len - 2); + + if (!s->hide_state) { + for (clut_id = 0; clut_id < h->num_rects; clut_id++) { + + /* CLUT segment */ + + if (h->rects[clut_id].nb_colors <= 4) { + /* 2 bpp, some decoders do not support it correctly */ + bpp_index = 0; + } else if (h->rects[clut_id].nb_colors <= 16) { + /* 4 bpp, standard encoding */ + bpp_index = 1; + } else { + return -1; + } + + *q++ = 0x0f; /* sync byte */ + *q++ = 0x12; /* CLUT definition segment */ + putbe16(&q, page_id); + pseg_len = q; + q += 2; /* segment length */ + *q++ = clut_id; + *q++ = (0 << 4) | 0xf; /* version = 0 */ + + for(i = 0; i < h->rects[clut_id].nb_colors; i++) { + *q++ = i; /* clut_entry_id */ + *q++ = (1 << (7 - bpp_index)) | (0xf << 1) | 1; /* 2 bits/pixel full range */ + { + int a, r, g, b; + a = (h->rects[clut_id].rgba_palette[i] >> 24) & 0xff; + r = (h->rects[clut_id].rgba_palette[i] >> 16) & 0xff; + g = (h->rects[clut_id].rgba_palette[i] >> 8) & 0xff; + b = (h->rects[clut_id].rgba_palette[i] >> 0) & 0xff; + + *q++ = RGB_TO_Y_CCIR(r, g, b); + *q++ = RGB_TO_V_CCIR(r, g, b, 0); + *q++ = RGB_TO_U_CCIR(r, g, b, 0); + *q++ = 255 - a; + } + } + + putbe16(&pseg_len, q - pseg_len - 2); + } + } + + for (region_id = 0; region_id < h->num_rects; region_id++) { + + /* region composition segment */ + + if (h->rects[region_id].nb_colors <= 4) { + /* 2 bpp, some decoders do not support it correctly */ + bpp_index = 0; + } else if (h->rects[region_id].nb_colors <= 16) { + /* 4 bpp, standard encoding */ + bpp_index = 1; + } else { + return -1; + } + + *q++ = 0x0f; /* sync_byte */ + *q++ = 0x11; /* segment_type */ + putbe16(&q, page_id); + pseg_len = q; + q += 2; /* segment length */ + *q++ = region_id; + *q++ = (s->object_version << 4) | (0 << 3) | 0x07; /* version , no fill */ + putbe16(&q, h->rects[region_id].w); /* region width */ + putbe16(&q, h->rects[region_id].h); /* region height */ + *q++ = ((1 + bpp_index) << 5) | ((1 + bpp_index) << 2) | 0x03; + *q++ = region_id; /* clut_id == region_id */ + *q++ = 0; /* 8 bit fill colors */ + *q++ = 0x03; /* 4 bit and 2 bit fill colors */ + + if (!s->hide_state) { + putbe16(&q, region_id); /* object_id == region_id */ + *q++ = (0 << 6) | (0 << 4); + *q++ = 0; + *q++ = 0xf0; + *q++ = 0; + } + + putbe16(&pseg_len, q - pseg_len - 2); + } + + if (!s->hide_state) { + + for (object_id = 0; object_id < h->num_rects; object_id++) { + /* Object Data segment */ + + if (h->rects[object_id].nb_colors <= 4) { + /* 2 bpp, some decoders do not support it correctly */ + bpp_index = 0; + } else if (h->rects[object_id].nb_colors <= 16) { + /* 4 bpp, standard encoding */ + bpp_index = 1; + } else { + return -1; + } + + *q++ = 0x0f; /* sync byte */ + *q++ = 0x13; + putbe16(&q, page_id); + pseg_len = q; + q += 2; /* segment length */ + + putbe16(&q, object_id); + *q++ = (s->object_version << 4) | (0 << 2) | (0 << 1) | 1; /* version = 0, + onject_coding_method, + non_modifying_color_flag */ + { + uint8_t *ptop_field_len, *pbottom_field_len, *top_ptr, *bottom_ptr; + void (*dvb_encode_rle)(uint8_t **pq, + const uint8_t *bitmap, int linesize, + int w, int h); + ptop_field_len = q; + q += 2; + pbottom_field_len = q; + q += 2; + + if (bpp_index == 0) + dvb_encode_rle = dvb_encode_rle2; + else + dvb_encode_rle = dvb_encode_rle4; + + top_ptr = q; + dvb_encode_rle(&q, h->rects[object_id].bitmap, h->rects[object_id].w * 2, + h->rects[object_id].w, h->rects[object_id].h >> 1); + bottom_ptr = q; + dvb_encode_rle(&q, h->rects[object_id].bitmap + h->rects[object_id].w, + h->rects[object_id].w * 2, h->rects[object_id].w, + h->rects[object_id].h >> 1); + + putbe16(&ptop_field_len, bottom_ptr - top_ptr); + putbe16(&pbottom_field_len, q - bottom_ptr); + } + + putbe16(&pseg_len, q - pseg_len - 2); + } + } + + /* end of display set segment */ + + *q++ = 0x0f; /* sync_byte */ + *q++ = 0x80; /* segment_type */ + putbe16(&q, page_id); + pseg_len = q; + q += 2; /* segment length */ + + putbe16(&pseg_len, q - pseg_len - 2); + + *q++ = 0xff; /* end of PES data */ + + s->object_version = (s->object_version + 1) & 0xf; + s->hide_state = !s->hide_state; + return q - outbuf; +} + +static int dvbsub_init_decoder(AVCodecContext *avctx) +{ + return 0; +} + +static int dvbsub_close_decoder(AVCodecContext *avctx) +{ + return 0; +} + +static int dvbsub_encode(AVCodecContext *avctx, + unsigned char *buf, int buf_size, void *data) +{ + DVBSubtitleContext *s = avctx->priv_data; + AVSubtitle *sub = data; + int ret; + + ret = encode_dvb_subtitles(s, buf, sub); + return ret; +} + +AVCodec dvbsub_encoder = { + "dvbsub", + CODEC_TYPE_SUBTITLE, + CODEC_ID_DVB_SUBTITLE, + sizeof(DVBSubtitleContext), + dvbsub_init_decoder, + dvbsub_encode, + dvbsub_close_decoder, +}; diff --git a/mpeg4/src/libavcodec/dvbsubdec.c b/mpeg4/src/libavcodec/dvbsubdec.c new file mode 100644 index 0000000000000000000000000000000000000000..525f7c23e160ecd6d7ac4b61f1845f1e32a004cd --- /dev/null +++ b/mpeg4/src/libavcodec/dvbsubdec.c @@ -0,0 +1,1631 @@ +/* + * DVB subtitle decoding for ffmpeg + * Copyright (c) 2005 Ian Caulfield. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "avcodec.h" +#include "dsputil.h" +#include "bitstream.h" + +//#define DEBUG +//#define DEBUG_PACKET_CONTENTS +//#define DEBUG_SAVE_IMAGES + +#define DVBSUB_PAGE_SEGMENT 0x10 +#define DVBSUB_REGION_SEGMENT 0x11 +#define DVBSUB_CLUT_SEGMENT 0x12 +#define DVBSUB_OBJECT_SEGMENT 0x13 +#define DVBSUB_DISPLAY_SEGMENT 0x80 + +static unsigned char *cm; + +#ifdef DEBUG_SAVE_IMAGES +#undef fprintf +#if 0 +static void png_save(const char *filename, uint8_t *bitmap, int w, int h, + uint32_t *rgba_palette) +{ + int x, y, v; + FILE *f; + char fname[40], fname2[40]; + char command[1024]; + + snprintf(fname, 40, "%s.ppm", filename); + + f = fopen(fname, "w"); + if (!f) { + perror(fname); + exit(1); + } + fprintf(f, "P6\n" + "%d %d\n" + "%d\n", + w, h, 255); + for(y = 0; y < h; y++) { + for(x = 0; x < w; x++) { + v = rgba_palette[bitmap[y * w + x]]; + putc((v >> 16) & 0xff, f); + putc((v >> 8) & 0xff, f); + putc((v >> 0) & 0xff, f); + } + } + fclose(f); + + + snprintf(fname2, 40, "%s-a.pgm", filename); + + f = fopen(fname2, "w"); + if (!f) { + perror(fname2); + exit(1); + } + fprintf(f, "P5\n" + "%d %d\n" + "%d\n", + w, h, 255); + for(y = 0; y < h; y++) { + for(x = 0; x < w; x++) { + v = rgba_palette[bitmap[y * w + x]]; + putc((v >> 24) & 0xff, f); + } + } + fclose(f); + + snprintf(command, 1024, "pnmtopng -alpha %s %s > %s.png 2> /dev/null", fname2, fname, filename); + system(command); + + snprintf(command, 1024, "rm %s %s", fname, fname2); + system(command); +} +#endif + +static void png_save2(const char *filename, uint32_t *bitmap, int w, int h) +{ + int x, y, v; + FILE *f; + char fname[40], fname2[40]; + char command[1024]; + + snprintf(fname, 40, "%s.ppm", filename); + + f = fopen(fname, "w"); + if (!f) { + perror(fname); + exit(1); + } + fprintf(f, "P6\n" + "%d %d\n" + "%d\n", + w, h, 255); + for(y = 0; y < h; y++) { + for(x = 0; x < w; x++) { + v = bitmap[y * w + x]; + putc((v >> 16) & 0xff, f); + putc((v >> 8) & 0xff, f); + putc((v >> 0) & 0xff, f); + } + } + fclose(f); + + + snprintf(fname2, 40, "%s-a.pgm", filename); + + f = fopen(fname2, "w"); + if (!f) { + perror(fname2); + exit(1); + } + fprintf(f, "P5\n" + "%d %d\n" + "%d\n", + w, h, 255); + for(y = 0; y < h; y++) { + for(x = 0; x < w; x++) { + v = bitmap[y * w + x]; + putc((v >> 24) & 0xff, f); + } + } + fclose(f); + + snprintf(command, 1024, "pnmtopng -alpha %s %s > %s.png 2> /dev/null", fname2, fname, filename); + system(command); + + snprintf(command, 1024, "rm %s %s", fname, fname2); + system(command); +} +#endif + +#define RGBA(r,g,b,a) (((a) << 24) | ((r) << 16) | ((g) << 8) | (b)) + +typedef struct DVBSubCLUT { + int id; + + uint32_t clut4[4]; + uint32_t clut16[16]; + uint32_t clut256[256]; + + struct DVBSubCLUT *next; +} DVBSubCLUT; + +static DVBSubCLUT default_clut; + +typedef struct DVBSubObjectDisplay { + int object_id; + int region_id; + + int x_pos; + int y_pos; + + int fgcolour; + int bgcolour; + + struct DVBSubObjectDisplay *region_list_next; + struct DVBSubObjectDisplay *object_list_next; +} DVBSubObjectDisplay; + +typedef struct DVBSubObject { + int id; + + int type; + + DVBSubObjectDisplay *display_list; + + struct DVBSubObject *next; +} DVBSubObject; + +typedef struct DVBSubRegionDisplay { + int region_id; + + int x_pos; + int y_pos; + + struct DVBSubRegionDisplay *next; +} DVBSubRegionDisplay; + +typedef struct DVBSubRegion { + int id; + + int width; + int height; + int depth; + + int clut; + int bgcolour; + + uint8_t *pbuf; + int buf_size; + + DVBSubObjectDisplay *display_list; + + struct DVBSubRegion *next; +} DVBSubRegion; + +typedef struct DVBSubContext { + int composition_id; + int ancillary_id; + + int time_out; + DVBSubRegion *region_list; + DVBSubCLUT *clut_list; + DVBSubObject *object_list; + + int display_list_size; + DVBSubRegionDisplay *display_list; +} DVBSubContext; + + +static DVBSubObject* get_object(DVBSubContext *ctx, int object_id) +{ + DVBSubObject *ptr = ctx->object_list; + + while (ptr != NULL && ptr->id != object_id) { + ptr = ptr->next; + } + + return ptr; +} + +static DVBSubCLUT* get_clut(DVBSubContext *ctx, int clut_id) +{ + DVBSubCLUT *ptr = ctx->clut_list; + + while (ptr != NULL && ptr->id != clut_id) { + ptr = ptr->next; + } + + return ptr; +} + +static DVBSubRegion* get_region(DVBSubContext *ctx, int region_id) +{ + DVBSubRegion *ptr = ctx->region_list; + + while (ptr != NULL && ptr->id != region_id) { + ptr = ptr->next; + } + + return ptr; +} + +static void delete_region_display_list(DVBSubContext *ctx, DVBSubRegion *region) +{ + DVBSubObject *object, *obj2, **obj2_ptr; + DVBSubObjectDisplay *display, *obj_disp, **obj_disp_ptr; + + while (region->display_list != NULL) { + display = region->display_list; + + object = get_object(ctx, display->object_id); + + if (object != NULL) { + obj_disp = object->display_list; + obj_disp_ptr = &object->display_list; + + while (obj_disp != NULL && obj_disp != display) { + obj_disp_ptr = &obj_disp->object_list_next; + obj_disp = obj_disp->object_list_next; + } + + if (obj_disp) { + *obj_disp_ptr = obj_disp->object_list_next; + + if (object->display_list == NULL) { + obj2 = ctx->object_list; + obj2_ptr = &ctx->object_list; + + while (obj2 != NULL && obj2 != object) { + obj2_ptr = &obj2->next; + obj2 = obj2->next; + } + + *obj2_ptr = obj2->next; + + av_free(obj2); + } + } + } + + region->display_list = display->region_list_next; + + av_free(display); + } + +} + +static void delete_state(DVBSubContext *ctx) +{ + DVBSubRegion *region; + DVBSubCLUT *clut; + + while (ctx->region_list != NULL) + { + region = ctx->region_list; + + ctx->region_list = region->next; + + delete_region_display_list(ctx, region); + if (region->pbuf != NULL) + av_free(region->pbuf); + + av_free(region); + } + + while (ctx->clut_list != NULL) + { + clut = ctx->clut_list; + + ctx->clut_list = clut->next; + + av_free(clut); + } + + /* Should already be null */ + if (ctx->object_list != NULL) + av_log(0, AV_LOG_ERROR, "Memory deallocation error!\n"); +} + +static int dvbsub_init_decoder(AVCodecContext *avctx) +{ + int i, r, g, b, a = 0; + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + + cm = cropTbl + MAX_NEG_CROP; + + memset(avctx->priv_data, 0, sizeof(DVBSubContext)); + + ctx->composition_id = avctx->sub_id & 0xffff; + ctx->ancillary_id = avctx->sub_id >> 16; + + default_clut.id = -1; + default_clut.next = NULL; + + default_clut.clut4[0] = RGBA( 0, 0, 0, 0); + default_clut.clut4[1] = RGBA(255, 255, 255, 255); + default_clut.clut4[2] = RGBA( 0, 0, 0, 255); + default_clut.clut4[3] = RGBA(127, 127, 127, 255); + + default_clut.clut16[0] = RGBA( 0, 0, 0, 0); + for (i = 1; i < 16; i++) { + if (i < 8) { + r = (i & 1) ? 255 : 0; + g = (i & 2) ? 255 : 0; + b = (i & 4) ? 255 : 0; + } else { + r = (i & 1) ? 127 : 0; + g = (i & 2) ? 127 : 0; + b = (i & 4) ? 127 : 0; + } + default_clut.clut16[i] = RGBA(r, g, b, 255); + } + + default_clut.clut256[0] = RGBA( 0, 0, 0, 0); + for (i = 1; i < 256; i++) { + if (i < 8) { + r = (i & 1) ? 255 : 0; + g = (i & 2) ? 255 : 0; + b = (i & 4) ? 255 : 0; + a = 63; + } else { + switch (i & 0x88) { + case 0x00: + r = ((i & 1) ? 85 : 0) + ((i & 0x10) ? 170 : 0); + g = ((i & 2) ? 85 : 0) + ((i & 0x20) ? 170 : 0); + b = ((i & 4) ? 85 : 0) + ((i & 0x40) ? 170 : 0); + a = 255; + break; + case 0x08: + r = ((i & 1) ? 85 : 0) + ((i & 0x10) ? 170 : 0); + g = ((i & 2) ? 85 : 0) + ((i & 0x20) ? 170 : 0); + b = ((i & 4) ? 85 : 0) + ((i & 0x40) ? 170 : 0); + a = 127; + break; + case 0x80: + r = 127 + ((i & 1) ? 43 : 0) + ((i & 0x10) ? 85 : 0); + g = 127 + ((i & 2) ? 43 : 0) + ((i & 0x20) ? 85 : 0); + b = 127 + ((i & 4) ? 43 : 0) + ((i & 0x40) ? 85 : 0); + a = 255; + break; + case 0x88: + r = ((i & 1) ? 43 : 0) + ((i & 0x10) ? 85 : 0); + g = ((i & 2) ? 43 : 0) + ((i & 0x20) ? 85 : 0); + b = ((i & 4) ? 43 : 0) + ((i & 0x40) ? 85 : 0); + a = 255; + break; + } + } + default_clut.clut256[i] = RGBA(r, g, b, a); + } + + return 0; +} + +static int dvbsub_close_decoder(AVCodecContext *avctx) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + DVBSubRegionDisplay *display; + + delete_state(ctx); + + while (ctx->display_list != NULL) + { + display = ctx->display_list; + ctx->display_list = display->next; + + av_free(display); + } + + return 0; +} + +static int dvbsub_read_2bit_string(uint8_t *destbuf, int dbuf_len, + uint8_t **srcbuf, int buf_size, + int non_mod, uint8_t *map_table) +{ + GetBitContext gb; + + int bits; + int run_length; + int pixels_read = 0; + + init_get_bits(&gb, *srcbuf, buf_size << 8); + + while (get_bits_count(&gb) < (buf_size << 8) && pixels_read < dbuf_len) { + bits = get_bits(&gb, 2); + + if (bits != 0) { + if (non_mod != 1 || bits != 1) { + if (map_table != NULL) + *destbuf++ = map_table[bits]; + else + *destbuf++ = bits; + } + pixels_read++; + } else { + bits = get_bits(&gb, 1); + if (bits == 1) { + run_length = get_bits(&gb, 3) + 3; + bits = get_bits(&gb, 2); + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + else { + if (map_table != NULL) + bits = map_table[bits]; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } else { + bits = get_bits(&gb, 1); + if (bits == 0) { + bits = get_bits(&gb, 2); + if (bits == 2) { + run_length = get_bits(&gb, 4) + 12; + bits = get_bits(&gb, 2); + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + else { + if (map_table != NULL) + bits = map_table[bits]; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } else if (bits == 3) { + run_length = get_bits(&gb, 8) + 29; + bits = get_bits(&gb, 2); + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + else { + if (map_table != NULL) + bits = map_table[bits]; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } else if (bits == 1) { + pixels_read += 2; + if (map_table != NULL) + bits = map_table[0]; + else + bits = 0; + if (pixels_read <= dbuf_len) { + *destbuf++ = bits; + *destbuf++ = bits; + } + } else { + (*srcbuf) += (get_bits_count(&gb) + 7) >> 3; + return pixels_read; + } + } else { + if (map_table != NULL) + bits = map_table[0]; + else + bits = 0; + *destbuf++ = bits; + pixels_read++; + } + } + } + } + + if (get_bits(&gb, 6) != 0) + av_log(0, AV_LOG_ERROR, "DVBSub error: line overflow\n"); + + (*srcbuf) += (get_bits_count(&gb) + 7) >> 3; + + return pixels_read; +} + +static int dvbsub_read_4bit_string(uint8_t *destbuf, int dbuf_len, + uint8_t **srcbuf, int buf_size, + int non_mod, uint8_t *map_table) +{ + GetBitContext gb; + + int bits; + int run_length; + int pixels_read = 0; + + init_get_bits(&gb, *srcbuf, buf_size << 8); + + while (get_bits_count(&gb) < (buf_size << 8) && pixels_read < dbuf_len) { + bits = get_bits(&gb, 4); + + if (bits != 0) { + if (non_mod != 1 || bits != 1) { + if (map_table != NULL) + *destbuf++ = map_table[bits]; + else + *destbuf++ = bits; + } + pixels_read++; + } else { + bits = get_bits(&gb, 1); + if (bits == 0) { + run_length = get_bits(&gb, 3); + + if (run_length == 0) { + (*srcbuf) += (get_bits_count(&gb) + 7) >> 3; + return pixels_read; + } + + run_length += 2; + + if (map_table != NULL) + bits = map_table[0]; + else + bits = 0; + + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } else { + bits = get_bits(&gb, 1); + if (bits == 0) { + run_length = get_bits(&gb, 2) + 4; + bits = get_bits(&gb, 4); + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + else { + if (map_table != NULL) + bits = map_table[bits]; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } else { + bits = get_bits(&gb, 2); + if (bits == 2) { + run_length = get_bits(&gb, 4) + 9; + bits = get_bits(&gb, 4); + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + else { + if (map_table != NULL) + bits = map_table[bits]; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } else if (bits == 3) { + run_length = get_bits(&gb, 8) + 25; + bits = get_bits(&gb, 4); + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + else { + if (map_table != NULL) + bits = map_table[bits]; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } else if (bits == 1) { + pixels_read += 2; + if (map_table != NULL) + bits = map_table[0]; + else + bits = 0; + if (pixels_read <= dbuf_len) { + *destbuf++ = bits; + *destbuf++ = bits; + } + } else { + if (map_table != NULL) + bits = map_table[0]; + else + bits = 0; + *destbuf++ = bits; + pixels_read ++; + } + } + } + } + } + + if (get_bits(&gb, 8) != 0) + av_log(0, AV_LOG_ERROR, "DVBSub error: line overflow\n"); + + (*srcbuf) += (get_bits_count(&gb) + 7) >> 3; + + return pixels_read; +} + +static int dvbsub_read_8bit_string(uint8_t *destbuf, int dbuf_len, + uint8_t **srcbuf, int buf_size, + int non_mod, uint8_t *map_table) +{ + uint8_t *sbuf_end = (*srcbuf) + buf_size; + int bits; + int run_length; + int pixels_read = 0; + + while (*srcbuf < sbuf_end && pixels_read < dbuf_len) { + bits = *(*srcbuf)++; + + if (bits != 0) { + if (non_mod != 1 || bits != 1) { + if (map_table != NULL) + *destbuf++ = map_table[bits]; + else + *destbuf++ = bits; + } + pixels_read++; + } else { + bits = *(*srcbuf)++; + run_length = bits & 0x7f; + if ((bits & 0x80) == 0) { + if (run_length == 0) { + return pixels_read; + } + + if (map_table != NULL) + bits = map_table[0]; + else + bits = 0; + while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } else { + bits = *(*srcbuf)++; + + if (non_mod == 1 && bits == 1) + pixels_read += run_length; + if (map_table != NULL) + bits = map_table[bits]; + else while (run_length-- > 0 && pixels_read < dbuf_len) { + *destbuf++ = bits; + pixels_read++; + } + } + } + } + + if (*(*srcbuf)++ != 0) + av_log(0, AV_LOG_ERROR, "DVBSub error: line overflow\n"); + + return pixels_read; +} + + + +static void dvbsub_parse_pixel_data_block(AVCodecContext *avctx, DVBSubObjectDisplay *display, + uint8_t *buf, int buf_size, int top_bottom, int non_mod) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + + DVBSubRegion *region = get_region(ctx, display->region_id); + uint8_t *buf_end = buf + buf_size; + uint8_t *pbuf; + int x_pos, y_pos; + int i; + + uint8_t map2to4[] = { 0x0, 0x7, 0x8, 0xf}; + uint8_t map2to8[] = {0x00, 0x77, 0x88, 0xff}; + uint8_t map4to8[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}; + uint8_t *map_table; + +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "DVB pixel block size %d, %s field:\n", buf_size, + top_bottom ? "bottom" : "top"); +#endif + +#ifdef DEBUG_PACKET_CONTENTS + for (i = 0; i < buf_size; i++) + { + if (i % 16 == 0) + av_log(avctx, AV_LOG_INFO, "0x%08p: ", buf+i); + + av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + if (i % 16 == 15) + av_log(avctx, AV_LOG_INFO, "\n"); + } + + if (i % 16 != 0) + av_log(avctx, AV_LOG_INFO, "\n"); + +#endif + + if (region == 0) + return; + + pbuf = region->pbuf; + + x_pos = display->x_pos; + y_pos = display->y_pos; + + if ((y_pos & 1) != top_bottom) + y_pos++; + + while (buf < buf_end) { + if (x_pos > region->width || y_pos > region->height) { + av_log(avctx, AV_LOG_ERROR, "Invalid object location!\n"); + return; + } + + switch (*buf++) { + case 0x10: + if (region->depth == 8) + map_table = map2to8; + else if (region->depth == 4) + map_table = map2to4; + else + map_table = NULL; + + x_pos += dvbsub_read_2bit_string(pbuf + (y_pos * region->width) + x_pos, + region->width - x_pos, &buf, buf_size, + non_mod, map_table); + break; + case 0x11: + if (region->depth < 4) { + av_log(avctx, AV_LOG_ERROR, "4-bit pixel string in %d-bit region!\n", region->depth); + return; + } + + if (region->depth == 8) + map_table = map4to8; + else + map_table = NULL; + + x_pos += dvbsub_read_4bit_string(pbuf + (y_pos * region->width) + x_pos, + region->width - x_pos, &buf, buf_size, + non_mod, map_table); + break; + case 0x12: + if (region->depth < 8) { + av_log(avctx, AV_LOG_ERROR, "8-bit pixel string in %d-bit region!\n", region->depth); + return; + } + + x_pos += dvbsub_read_8bit_string(pbuf + (y_pos * region->width) + x_pos, + region->width - x_pos, &buf, buf_size, + non_mod, NULL); + break; + + case 0x20: + map2to4[0] = (*buf) >> 4; + map2to4[1] = (*buf++) & 0xf; + map2to4[2] = (*buf) >> 4; + map2to4[3] = (*buf++) & 0xf; + break; + case 0x21: + for (i = 0; i < 4; i++) + map2to8[i] = *buf++; + break; + case 0x22: + for (i = 0; i < 16; i++) + map4to8[i] = *buf++; + break; + + case 0xf0: + x_pos = display->x_pos; + y_pos += 2; + break; + default: + av_log(avctx, AV_LOG_INFO, "Unknown/unsupported pixel block 0x%x\n", *(buf-1)); + } + } + +} + +static void dvbsub_parse_object_segment(AVCodecContext *avctx, + uint8_t *buf, int buf_size) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + + uint8_t *buf_end = buf + buf_size; + uint8_t *block; + int object_id; + DVBSubObject *object; + DVBSubObjectDisplay *display; + int top_field_len, bottom_field_len; + + int coding_method, non_modifying_colour; + + object_id = BE_16(buf); + buf += 2; + + object = get_object(ctx, object_id); + + if (!object) + return; + + coding_method = ((*buf) >> 2) & 3; + non_modifying_colour = ((*buf++) >> 1) & 1; + + if (coding_method == 0) { + top_field_len = BE_16(buf); + buf += 2; + bottom_field_len = BE_16(buf); + buf += 2; + + if (buf + top_field_len + bottom_field_len > buf_end) { + av_log(avctx, AV_LOG_ERROR, "Field data size too large\n"); + return; + } + + for (display = object->display_list; display != 0; display = display->object_list_next) { + block = buf; + + dvbsub_parse_pixel_data_block(avctx, display, block, top_field_len, 0, + non_modifying_colour); + + if (bottom_field_len > 0) + block = buf + top_field_len; + else + bottom_field_len = top_field_len; + + dvbsub_parse_pixel_data_block(avctx, display, block, bottom_field_len, 1, + non_modifying_colour); + } + +/* } else if (coding_method == 1) {*/ + + } else { + av_log(avctx, AV_LOG_ERROR, "Unknown object coding %d\n", coding_method); + } + +} + +#define SCALEBITS 10 +#define ONE_HALF (1 << (SCALEBITS - 1)) +#define FIX(x) ((int) ((x) * (1<> SCALEBITS];\ + g = cm[(y + g_add) >> SCALEBITS];\ + b = cm[(y + b_add) >> SCALEBITS];\ +} + + +static void dvbsub_parse_clut_segment(AVCodecContext *avctx, + uint8_t *buf, int buf_size) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + + uint8_t *buf_end = buf + buf_size; + int clut_id; + DVBSubCLUT *clut; + int entry_id, depth , full_range; + int y, cr, cb, alpha; + int r, g, b, r_add, g_add, b_add; + +#ifdef DEBUG_PACKET_CONTENTS + int i; + + av_log(avctx, AV_LOG_INFO, "DVB clut packet:\n"); + + for (i=0; i < buf_size; i++) + { + av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + if (i % 16 == 15) + av_log(avctx, AV_LOG_INFO, "\n"); + } + + if (i % 16 != 0) + av_log(avctx, AV_LOG_INFO, "\n"); + +#endif + + clut_id = *buf++; + buf += 1; + + clut = get_clut(ctx, clut_id); + + if (clut == NULL) { + clut = av_malloc(sizeof(DVBSubCLUT)); + + memcpy(clut, &default_clut, sizeof(DVBSubCLUT)); + + clut->id = clut_id; + + clut->next = ctx->clut_list; + ctx->clut_list = clut; + } + + while (buf + 4 < buf_end) + { + entry_id = *buf++; + + depth = (*buf) & 0xe0; + + if (depth == 0) { + av_log(avctx, AV_LOG_ERROR, "Invalid clut depth 0x%x!\n", *buf); + return; + } + + full_range = (*buf++) & 1; + + if (full_range) { + y = *buf++; + cr = *buf++; + cb = *buf++; + alpha = *buf++; + } else { + y = buf[0] & 0xfc; + cr = (((buf[0] & 3) << 2) | ((buf[1] >> 6) & 3)) << 4; + cb = (buf[1] << 2) & 0xf0; + alpha = (buf[1] << 6) & 0xc0; + + buf += 2; + } + + if (y == 0) + alpha = 0xff; + + YUV_TO_RGB1_CCIR(cb, cr); + YUV_TO_RGB2_CCIR(r, g, b, y); + +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "clut %d := (%d,%d,%d,%d)\n", entry_id, r, g, b, alpha); +#endif + + if (depth & 0x80) + clut->clut4[entry_id] = RGBA(r,g,b,255 - alpha); + if (depth & 0x40) + clut->clut16[entry_id] = RGBA(r,g,b,255 - alpha); + if (depth & 0x20) + clut->clut256[entry_id] = RGBA(r,g,b,255 - alpha); + } +} + + +static void dvbsub_parse_region_segment(AVCodecContext *avctx, + uint8_t *buf, int buf_size) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + + uint8_t *buf_end = buf + buf_size; + int region_id, object_id; + DVBSubRegion *region; + DVBSubObject *object; + DVBSubObjectDisplay *display; + int fill; + + if (buf_size < 10) + return; + + region_id = *buf++; + + region = get_region(ctx, region_id); + + if (region == NULL) + { + region = av_mallocz(sizeof(DVBSubRegion)); + + region->id = region_id; + + region->next = ctx->region_list; + ctx->region_list = region; + } + + fill = ((*buf++) >> 3) & 1; + + region->width = BE_16(buf); + buf += 2; + region->height = BE_16(buf); + buf += 2; + + if (region->width * region->height != region->buf_size) { + if (region->pbuf != 0) + av_free(region->pbuf); + + region->buf_size = region->width * region->height; + + region->pbuf = av_malloc(region->buf_size); + + fill = 1; + } + + region->depth = 1 << (((*buf++) >> 2) & 7); + region->clut = *buf++; + + if (region->depth == 8) + region->bgcolour = *buf++; + else { + buf += 1; + + if (region->depth == 4) + region->bgcolour = (((*buf++) >> 4) & 15); + else + region->bgcolour = (((*buf++) >> 2) & 3); + } + +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Region %d, (%dx%d)\n", region_id, region->width, region->height); +#endif + + if (fill) { + memset(region->pbuf, region->bgcolour, region->buf_size); +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Fill region (%d)\n", region->bgcolour); +#endif + } + + delete_region_display_list(ctx, region); + + while (buf + 5 < buf_end) { + object_id = BE_16(buf); + buf += 2; + + object = get_object(ctx, object_id); + + if (object == NULL) { + object = av_mallocz(sizeof(DVBSubObject)); + + object->id = object_id; + object->next = ctx->object_list; + ctx->object_list = object; + } + + object->type = (*buf) >> 6; + + display = av_mallocz(sizeof(DVBSubObjectDisplay)); + + display->object_id = object_id; + display->region_id = region_id; + + display->x_pos = BE_16(buf) & 0xfff; + buf += 2; + display->y_pos = BE_16(buf) & 0xfff; + buf += 2; + + if ((object->type == 1 || object->type == 2) && buf+1 < buf_end) { + display->fgcolour = *buf++; + display->bgcolour = *buf++; + } + + display->region_list_next = region->display_list; + region->display_list = display; + + display->object_list_next = object->display_list; + object->display_list = display; + } +} + +static void dvbsub_parse_page_segment(AVCodecContext *avctx, + uint8_t *buf, int buf_size) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + DVBSubRegionDisplay *display; + DVBSubRegionDisplay *tmp_display_list, **tmp_ptr; + + uint8_t *buf_end = buf + buf_size; + int region_id; + int page_state; + + if (buf_size < 1) + return; + + ctx->time_out = *buf++; + page_state = ((*buf++) >> 2) & 3; + +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Page time out %ds, state %d\n", ctx->time_out, page_state); +#endif + + if (page_state == 2) + { + delete_state(ctx); + } + + tmp_display_list = ctx->display_list; + ctx->display_list = NULL; + ctx->display_list_size = 0; + + while (buf + 5 < buf_end) { + region_id = *buf++; + buf += 1; + + display = tmp_display_list; + tmp_ptr = &tmp_display_list; + + while (display != NULL && display->region_id != region_id) { + tmp_ptr = &display->next; + display = display->next; + } + + if (display == NULL) + display = av_mallocz(sizeof(DVBSubRegionDisplay)); + + display->region_id = region_id; + + display->x_pos = BE_16(buf); + buf += 2; + display->y_pos = BE_16(buf); + buf += 2; + + *tmp_ptr = display->next; + + display->next = ctx->display_list; + ctx->display_list = display; + ctx->display_list_size++; + +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Region %d, (%d,%d)\n", region_id, display->x_pos, display->y_pos); +#endif + } + + while (tmp_display_list != 0) { + display = tmp_display_list; + + tmp_display_list = display->next; + + av_free(display); + } + +} + + +#ifdef DEBUG_SAVE_IMAGES +static void save_display_set(DVBSubContext *ctx) +{ + DVBSubRegion *region; + DVBSubRegionDisplay *display; + DVBSubCLUT *clut; + uint32_t *clut_table; + int x_pos, y_pos, width, height; + int x, y, y_off, x_off; + uint32_t *pbuf; + char filename[32]; + static int fileno_index = 0; + + x_pos = -1; + y_pos = -1; + width = 0; + height = 0; + + for (display = ctx->display_list; display != NULL; display = display->next) { + region = get_region(ctx, display->region_id); + + if (x_pos == -1) { + x_pos = display->x_pos; + y_pos = display->y_pos; + width = region->width; + height = region->height; + } else { + if (display->x_pos < x_pos) { + width += (x_pos - display->x_pos); + x_pos = display->x_pos; + } + + if (display->y_pos < y_pos) { + height += (y_pos - display->y_pos); + y_pos = display->y_pos; + } + + if (display->x_pos + region->width > x_pos + width) { + width = display->x_pos + region->width - x_pos; + } + + if (display->y_pos + region->height > y_pos + height) { + height = display->y_pos + region->height - y_pos; + } + } + } + + if (x_pos >= 0) { + + pbuf = av_malloc(width * height * 4); + + for (display = ctx->display_list; display != NULL; display = display->next) { + region = get_region(ctx, display->region_id); + + x_off = display->x_pos - x_pos; + y_off = display->y_pos - y_pos; + + clut = get_clut(ctx, region->clut); + + if (clut == 0) + clut = &default_clut; + + switch (region->depth) { + case 2: + clut_table = clut->clut4; + break; + case 8: + clut_table = clut->clut256; + break; + case 4: + default: + clut_table = clut->clut16; + break; + } + + for (y = 0; y < region->height; y++) { + for (x = 0; x < region->width; x++) { + pbuf[((y + y_off) * width) + x_off + x] = + clut_table[region->pbuf[y * region->width + x]]; + } + } + + } + + snprintf(filename, 32, "dvbs.%d", fileno_index); + + png_save2(filename, pbuf, width, height); + + av_free(pbuf); + } + + fileno_index++; +} +#endif + +static int dvbsub_display_end_segment(AVCodecContext *avctx, uint8_t *buf, + int buf_size, AVSubtitle *sub) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + + DVBSubRegion *region; + DVBSubRegionDisplay *display; + AVSubtitleRect *rect; + DVBSubCLUT *clut; + uint32_t *clut_table; + int i; + + sub->rects = NULL; + sub->start_display_time = 0; + sub->end_display_time = ctx->time_out * 1000; + sub->format = 0; + + sub->num_rects = ctx->display_list_size; + + if (sub->num_rects > 0) + sub->rects = av_mallocz(sizeof(AVSubtitleRect) * sub->num_rects); + + i = 0; + + for (display = ctx->display_list; display != NULL; display = display->next) { + region = get_region(ctx, display->region_id); + rect = &sub->rects[i]; + + if (region == NULL) + continue; + + rect->x = display->x_pos; + rect->y = display->y_pos; + rect->w = region->width; + rect->h = region->height; + rect->nb_colors = 16; + rect->linesize = region->width; + + clut = get_clut(ctx, region->clut); + + if (clut == NULL) + clut = &default_clut; + + switch (region->depth) { + case 2: + clut_table = clut->clut4; + break; + case 8: + clut_table = clut->clut256; + break; + case 4: + default: + clut_table = clut->clut16; + break; + } + + rect->rgba_palette = av_malloc((1 << region->depth) * sizeof(uint32_t)); + memcpy(rect->rgba_palette, clut_table, (1 << region->depth) * sizeof(uint32_t)); + + rect->bitmap = av_malloc(region->buf_size); + memcpy(rect->bitmap, region->pbuf, region->buf_size); + + i++; + } + + sub->num_rects = i; + +#ifdef DEBUG_SAVE_IMAGES + save_display_set(ctx); +#endif + + return 1; +} + +static int dvbsub_decode(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + DVBSubContext *ctx = (DVBSubContext*) avctx->priv_data; + AVSubtitle *sub = (AVSubtitle*) data; + uint8_t *p, *p_end; + int segment_type; + int page_id; + int segment_length; + +#ifdef DEBUG_PACKET_CONTENTS + int i; + + av_log(avctx, AV_LOG_INFO, "DVB sub packet:\n"); + + for (i=0; i < buf_size; i++) + { + av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + if (i % 16 == 15) + av_log(avctx, AV_LOG_INFO, "\n"); + } + + if (i % 16 != 0) + av_log(avctx, AV_LOG_INFO, "\n"); + +#endif + + if (buf_size <= 2) + return -1; + + p = buf; + p_end = buf + buf_size; + + while (p < p_end && *p == 0x0f) + { + p += 1; + segment_type = *p++; + page_id = BE_16(p); + p += 2; + segment_length = BE_16(p); + p += 2; + + if (page_id == ctx->composition_id || page_id == ctx->ancillary_id) { + switch (segment_type) { + case DVBSUB_PAGE_SEGMENT: + dvbsub_parse_page_segment(avctx, p, segment_length); + break; + case DVBSUB_REGION_SEGMENT: + dvbsub_parse_region_segment(avctx, p, segment_length); + break; + case DVBSUB_CLUT_SEGMENT: + dvbsub_parse_clut_segment(avctx, p, segment_length); + break; + case DVBSUB_OBJECT_SEGMENT: + dvbsub_parse_object_segment(avctx, p, segment_length); + break; + case DVBSUB_DISPLAY_SEGMENT: + *data_size = dvbsub_display_end_segment(avctx, p, segment_length, sub); + break; + default: +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Subtitling segment type 0x%x, page id %d, length %d\n", + segment_type, page_id, segment_length); +#endif + break; + } + } + + p += segment_length; + } + + if (p != p_end) + { +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Junk at end of packet\n"); +#endif + return -1; + } + + return buf_size; +} + + +AVCodec dvbsub_decoder = { + "dvbsub", + CODEC_TYPE_SUBTITLE, + CODEC_ID_DVB_SUBTITLE, + sizeof(DVBSubContext), + dvbsub_init_decoder, + NULL, + dvbsub_close_decoder, + dvbsub_decode, +}; + +/* Parser (mostly) copied from dvdsub.c */ + +#define PARSE_BUF_SIZE (65536) + + +/* parser definition */ +typedef struct DVBSubParseContext { + uint8_t *packet_buf; + int packet_start; + int packet_index; + int in_packet; +} DVBSubParseContext; + +static int dvbsub_parse_init(AVCodecParserContext *s) +{ + DVBSubParseContext *pc = s->priv_data; + pc->packet_buf = av_malloc(PARSE_BUF_SIZE); + + return 0; +} + +static int dvbsub_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + DVBSubParseContext *pc = s->priv_data; + uint8_t *p, *p_end; + int len, buf_pos = 0; + +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "DVB parse packet pts=%Lx, lpts=%Lx, cpts=%Lx:\n", + s->pts, s->last_pts, s->cur_frame_pts[s->cur_frame_start_index]); +#endif + +#ifdef DEBUG_PACKET_CONTENTS + int i; + + for (i=0; i < buf_size; i++) + { + av_log(avctx, AV_LOG_INFO, "%02x ", buf[i]); + if (i % 16 == 15) + av_log(avctx, AV_LOG_INFO, "\n"); + } + + if (i % 16 != 0) + av_log(avctx, AV_LOG_INFO, "\n"); + +#endif + + *poutbuf = NULL; + *poutbuf_size = 0; + + s->fetch_timestamp = 1; + + if (s->last_pts != s->pts && s->last_pts != AV_NOPTS_VALUE) /* Start of a new packet */ + { + if (pc->packet_index != pc->packet_start) + { +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Discarding %d bytes\n", + pc->packet_index - pc->packet_start); +#endif + } + + pc->packet_start = 0; + pc->packet_index = 0; + + if (buf_size < 2 || buf[0] != 0x20 || buf[1] != 0x00) { +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Bad packet header\n"); +#endif + return -1; + } + + buf_pos = 2; + + pc->in_packet = 1; + } else { + if (pc->packet_start != 0) + { + if (pc->packet_index != pc->packet_start) + { + memmove(pc->packet_buf, pc->packet_buf + pc->packet_start, + pc->packet_index - pc->packet_start); + + pc->packet_index -= pc->packet_start; + pc->packet_start = 0; + } else { + pc->packet_start = 0; + pc->packet_index = 0; + } + } + } + + if (buf_size - buf_pos + pc->packet_index > PARSE_BUF_SIZE) + return -1; + +/* if not currently in a packet, discard data */ + if (pc->in_packet == 0) + return buf_size; + + memcpy(pc->packet_buf + pc->packet_index, buf + buf_pos, buf_size - buf_pos); + pc->packet_index += buf_size - buf_pos; + + p = pc->packet_buf; + p_end = pc->packet_buf + pc->packet_index; + + while (p < p_end) + { + if (*p == 0x0f) + { + if (p + 6 <= p_end) + { + len = BE_16(p + 4); + + if (p + len + 6 <= p_end) + { + *poutbuf_size += len + 6; + + p += len + 6; + } else + break; + } else + break; + } else if (*p == 0xff) { + if (p + 1 < p_end) + { +#ifdef DEBUG + av_log(avctx, AV_LOG_INFO, "Junk at end of packet\n"); +#endif + } + pc->packet_index = p - pc->packet_buf; + pc->in_packet = 0; + break; + } else { + av_log(avctx, AV_LOG_ERROR, "Junk in packet\n"); + + pc->packet_index = p - pc->packet_buf; + pc->in_packet = 0; + break; + } + } + + if (*poutbuf_size > 0) + { + *poutbuf = pc->packet_buf; + pc->packet_start = *poutbuf_size; + } + + if (s->last_pts == AV_NOPTS_VALUE) + s->last_pts = s->pts; + + return buf_size; +} + +static void dvbsub_parse_close(AVCodecParserContext *s) +{ + DVBSubParseContext *pc = s->priv_data; + av_freep(&pc->packet_buf); +} + +AVCodecParser dvbsub_parser = { + { CODEC_ID_DVB_SUBTITLE }, + sizeof(DVBSubParseContext), + dvbsub_parse_init, + dvbsub_parse, + dvbsub_parse_close, +}; diff --git a/mpeg4/src/libavcodec/dvdata.h b/mpeg4/src/libavcodec/dvdata.h new file mode 100644 index 0000000000000000000000000000000000000000..a3d42d66c17f7f5272f69099bd84f6cd5074a526 --- /dev/null +++ b/mpeg4/src/libavcodec/dvdata.h @@ -0,0 +1,2665 @@ +/* + * Constants for DV codec + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file dvdata.h + * Constants for DV codec. + */ + +/* + * DVprofile is used to express the differences between various + * DV flavors. For now it's primarily used for differentiating + * 525/60 and 625/50, but the plans are to use it for various + * DV specs as well (e.g. SMPTE314M vs. IEC 61834). + */ +typedef struct DVprofile { + int dsf; /* value of the dsf in the DV header */ + int frame_size; /* total size of one frame in bytes */ + int difseg_size; /* number of DIF segments per DIF channel */ + int n_difchan; /* number of DIF channels per frame */ + int frame_rate; + int frame_rate_base; + int ltc_divisor; /* FPS from the LTS standpoint */ + int height; /* picture height in pixels */ + int width; /* picture width in pixels */ + AVRational sar[2]; /* sample aspect ratios for 4:3 and 16:9 */ + const uint16_t *video_place; /* positions of all DV macro blocks */ + enum PixelFormat pix_fmt; /* picture pixel format */ + + int audio_stride; /* size of audio_shuffle table */ + int audio_min_samples[3];/* min ammount of audio samples */ + /* for 48Khz, 44.1Khz and 32Khz */ + int audio_samples_dist[5];/* how many samples are supposed to be */ + /* in each frame in a 5 frames window */ + const uint16_t (*audio_shuffle)[9]; /* PCM shuffling table */ +} DVprofile; + +#define NB_DV_VLC 409 + +/* + * There's a catch about the following three tables: the mapping they establish + * between (run, level) and vlc is not 1-1. So you have to watch out for that + * when building misc. tables. E.g. (1, 0) can be either 0x7cf or 0x1f82. + */ +static const uint16_t dv_vlc_bits[409] = { + 0x0000, 0x0002, 0x0007, 0x0008, 0x0009, 0x0014, 0x0015, 0x0016, + 0x0017, 0x0030, 0x0031, 0x0032, 0x0033, 0x0068, 0x0069, 0x006a, + 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x00e0, 0x00e1, 0x00e2, + 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, + 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x01e0, 0x01e1, 0x01e2, + 0x01e3, 0x01e4, 0x01e5, 0x01e6, 0x01e7, 0x01e8, 0x01e9, 0x01ea, + 0x01eb, 0x01ec, 0x01ed, 0x01ee, 0x01ef, 0x03e0, 0x03e1, 0x03e2, + 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x07ce, 0x07cf, 0x07d0, 0x07d1, + 0x07d2, 0x07d3, 0x07d4, 0x07d5, 0x0fac, 0x0fad, 0x0fae, 0x0faf, + 0x0fb0, 0x0fb1, 0x0fb2, 0x0fb3, 0x0fb4, 0x0fb5, 0x0fb6, 0x0fb7, + 0x0fb8, 0x0fb9, 0x0fba, 0x0fbb, 0x0fbc, 0x0fbd, 0x0fbe, 0x0fbf, + 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, + 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, + 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, + 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, + 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, + 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, + 0x1fb0, 0x1fb1, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb5, 0x1fb6, 0x1fb7, + 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, 0x1fbc, 0x1fbd, 0x1fbe, 0x1fbf, + 0x7f00, 0x7f01, 0x7f02, 0x7f03, 0x7f04, 0x7f05, 0x7f06, 0x7f07, + 0x7f08, 0x7f09, 0x7f0a, 0x7f0b, 0x7f0c, 0x7f0d, 0x7f0e, 0x7f0f, + 0x7f10, 0x7f11, 0x7f12, 0x7f13, 0x7f14, 0x7f15, 0x7f16, 0x7f17, + 0x7f18, 0x7f19, 0x7f1a, 0x7f1b, 0x7f1c, 0x7f1d, 0x7f1e, 0x7f1f, + 0x7f20, 0x7f21, 0x7f22, 0x7f23, 0x7f24, 0x7f25, 0x7f26, 0x7f27, + 0x7f28, 0x7f29, 0x7f2a, 0x7f2b, 0x7f2c, 0x7f2d, 0x7f2e, 0x7f2f, + 0x7f30, 0x7f31, 0x7f32, 0x7f33, 0x7f34, 0x7f35, 0x7f36, 0x7f37, + 0x7f38, 0x7f39, 0x7f3a, 0x7f3b, 0x7f3c, 0x7f3d, 0x7f3e, 0x7f3f, + 0x7f40, 0x7f41, 0x7f42, 0x7f43, 0x7f44, 0x7f45, 0x7f46, 0x7f47, + 0x7f48, 0x7f49, 0x7f4a, 0x7f4b, 0x7f4c, 0x7f4d, 0x7f4e, 0x7f4f, + 0x7f50, 0x7f51, 0x7f52, 0x7f53, 0x7f54, 0x7f55, 0x7f56, 0x7f57, + 0x7f58, 0x7f59, 0x7f5a, 0x7f5b, 0x7f5c, 0x7f5d, 0x7f5e, 0x7f5f, + 0x7f60, 0x7f61, 0x7f62, 0x7f63, 0x7f64, 0x7f65, 0x7f66, 0x7f67, + 0x7f68, 0x7f69, 0x7f6a, 0x7f6b, 0x7f6c, 0x7f6d, 0x7f6e, 0x7f6f, + 0x7f70, 0x7f71, 0x7f72, 0x7f73, 0x7f74, 0x7f75, 0x7f76, 0x7f77, + 0x7f78, 0x7f79, 0x7f7a, 0x7f7b, 0x7f7c, 0x7f7d, 0x7f7e, 0x7f7f, + 0x7f80, 0x7f81, 0x7f82, 0x7f83, 0x7f84, 0x7f85, 0x7f86, 0x7f87, + 0x7f88, 0x7f89, 0x7f8a, 0x7f8b, 0x7f8c, 0x7f8d, 0x7f8e, 0x7f8f, + 0x7f90, 0x7f91, 0x7f92, 0x7f93, 0x7f94, 0x7f95, 0x7f96, 0x7f97, + 0x7f98, 0x7f99, 0x7f9a, 0x7f9b, 0x7f9c, 0x7f9d, 0x7f9e, 0x7f9f, + 0x7fa0, 0x7fa1, 0x7fa2, 0x7fa3, 0x7fa4, 0x7fa5, 0x7fa6, 0x7fa7, + 0x7fa8, 0x7fa9, 0x7faa, 0x7fab, 0x7fac, 0x7fad, 0x7fae, 0x7faf, + 0x7fb0, 0x7fb1, 0x7fb2, 0x7fb3, 0x7fb4, 0x7fb5, 0x7fb6, 0x7fb7, + 0x7fb8, 0x7fb9, 0x7fba, 0x7fbb, 0x7fbc, 0x7fbd, 0x7fbe, 0x7fbf, + 0x7fc0, 0x7fc1, 0x7fc2, 0x7fc3, 0x7fc4, 0x7fc5, 0x7fc6, 0x7fc7, + 0x7fc8, 0x7fc9, 0x7fca, 0x7fcb, 0x7fcc, 0x7fcd, 0x7fce, 0x7fcf, + 0x7fd0, 0x7fd1, 0x7fd2, 0x7fd3, 0x7fd4, 0x7fd5, 0x7fd6, 0x7fd7, + 0x7fd8, 0x7fd9, 0x7fda, 0x7fdb, 0x7fdc, 0x7fdd, 0x7fde, 0x7fdf, + 0x7fe0, 0x7fe1, 0x7fe2, 0x7fe3, 0x7fe4, 0x7fe5, 0x7fe6, 0x7fe7, + 0x7fe8, 0x7fe9, 0x7fea, 0x7feb, 0x7fec, 0x7fed, 0x7fee, 0x7fef, + 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff5, 0x7ff6, 0x7ff7, + 0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff, + 0x0006, +}; + +static const uint8_t dv_vlc_len[409] = { + 2, 3, 4, 4, 4, 5, 5, 5, + 5, 6, 6, 6, 6, 7, 7, 7, + 7, 7, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 4, +}; + +static const uint8_t dv_vlc_run[409] = { + 0, 0, 1, 0, 0, 2, 1, 0, + 0, 3, 4, 0, 0, 5, 6, 2, + 1, 1, 0, 0, 0, 7, 8, 9, + 10, 3, 4, 2, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 11, 12, 13, + 14, 5, 6, 3, 4, 2, 2, 1, + 0, 0, 0, 0, 0, 5, 3, 3, + 2, 1, 1, 1, 0, 1, 6, 4, + 3, 1, 1, 1, 2, 3, 4, 5, + 7, 8, 9, 10, 7, 8, 4, 3, + 2, 2, 2, 2, 2, 1, 1, 1, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +127, +}; + +static const uint8_t dv_vlc_level[409] = { + 1, 2, 1, 3, 4, 1, 2, 5, + 6, 1, 1, 7, 8, 1, 1, 2, + 3, 4, 9, 10, 11, 1, 1, 1, + 1, 2, 2, 3, 5, 6, 7, 12, + 13, 14, 15, 16, 17, 1, 1, 1, + 1, 2, 2, 3, 3, 4, 5, 8, + 18, 19, 20, 21, 22, 3, 4, 5, + 6, 9, 10, 11, 0, 0, 3, 4, + 6, 12, 13, 14, 0, 0, 0, 0, + 2, 2, 2, 2, 3, 3, 5, 7, + 7, 8, 9, 10, 11, 15, 16, 17, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, + 0, +}; + +/* unquant tables (not used directly) */ +static const uint8_t dv_88_areas[64] = { + 0,0,0,1,1,1,2,2, + 0,0,1,1,1,2,2,2, + 0,1,1,1,2,2,2,3, + 1,1,1,2,2,2,3,3, + 1,1,2,2,2,3,3,3, + 1,2,2,2,3,3,3,3, + 2,2,2,3,3,3,3,3, + 2,2,3,3,3,3,3,3, +}; + +static const uint8_t dv_248_areas[64] = { + 0,0,1,1,1,2,2,3, + 0,0,1,1,2,2,2,3, + 0,1,1,2,2,2,3,3, + 0,1,1,2,2,2,3,3, + 1,1,2,2,2,3,3,3, + 1,1,2,2,2,3,3,3, + 1,2,2,2,3,3,3,3, + 1,2,2,3,3,3,3,3, +}; + +static const uint8_t dv_quant_shifts[22][4] = { + { 3,3,4,4 }, + { 3,3,4,4 }, + { 2,3,3,4 }, + { 2,3,3,4 }, + { 2,2,3,3 }, + { 2,2,3,3 }, + { 1,2,2,3 }, + { 1,2,2,3 }, + { 1,1,2,2 }, + { 1,1,2,2 }, + { 0,1,1,2 }, + { 0,1,1,2 }, + { 0,0,1,1 }, + { 0,0,1,1 }, + { 0,0,0,1 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, + { 0,0,0,0 }, +}; + +static const uint8_t dv_quant_offset[4] = { 6, 3, 0, 1 }; + +/* NOTE: I prefer hardcoding the positionning of dv blocks, it is + simpler :-) */ + +static const uint16_t dv_place_420[1620] = { + 0x0c24, 0x2412, 0x3036, 0x0000, 0x1848, + 0x0e24, 0x2612, 0x3236, 0x0200, 0x1a48, + 0x1024, 0x2812, 0x3436, 0x0400, 0x1c48, + 0x1026, 0x2814, 0x3438, 0x0402, 0x1c4a, + 0x0e26, 0x2614, 0x3238, 0x0202, 0x1a4a, + 0x0c26, 0x2414, 0x3038, 0x0002, 0x184a, + 0x0c28, 0x2416, 0x303a, 0x0004, 0x184c, + 0x0e28, 0x2616, 0x323a, 0x0204, 0x1a4c, + 0x1028, 0x2816, 0x343a, 0x0404, 0x1c4c, + 0x102a, 0x2818, 0x343c, 0x0406, 0x1c4e, + 0x0e2a, 0x2618, 0x323c, 0x0206, 0x1a4e, + 0x0c2a, 0x2418, 0x303c, 0x0006, 0x184e, + 0x0c2c, 0x241a, 0x303e, 0x0008, 0x1850, + 0x0e2c, 0x261a, 0x323e, 0x0208, 0x1a50, + 0x102c, 0x281a, 0x343e, 0x0408, 0x1c50, + 0x102e, 0x281c, 0x3440, 0x040a, 0x1c52, + 0x0e2e, 0x261c, 0x3240, 0x020a, 0x1a52, + 0x0c2e, 0x241c, 0x3040, 0x000a, 0x1852, + 0x0c30, 0x241e, 0x3042, 0x000c, 0x1854, + 0x0e30, 0x261e, 0x3242, 0x020c, 0x1a54, + 0x1030, 0x281e, 0x3442, 0x040c, 0x1c54, + 0x1032, 0x2820, 0x3444, 0x040e, 0x1c56, + 0x0e32, 0x2620, 0x3244, 0x020e, 0x1a56, + 0x0c32, 0x2420, 0x3044, 0x000e, 0x1856, + 0x0c34, 0x2422, 0x3046, 0x0010, 0x1858, + 0x0e34, 0x2622, 0x3246, 0x0210, 0x1a58, + 0x1034, 0x2822, 0x3446, 0x0410, 0x1c58, + 0x1224, 0x2a12, 0x3636, 0x0600, 0x1e48, + 0x1424, 0x2c12, 0x3836, 0x0800, 0x2048, + 0x1624, 0x2e12, 0x3a36, 0x0a00, 0x2248, + 0x1626, 0x2e14, 0x3a38, 0x0a02, 0x224a, + 0x1426, 0x2c14, 0x3838, 0x0802, 0x204a, + 0x1226, 0x2a14, 0x3638, 0x0602, 0x1e4a, + 0x1228, 0x2a16, 0x363a, 0x0604, 0x1e4c, + 0x1428, 0x2c16, 0x383a, 0x0804, 0x204c, + 0x1628, 0x2e16, 0x3a3a, 0x0a04, 0x224c, + 0x162a, 0x2e18, 0x3a3c, 0x0a06, 0x224e, + 0x142a, 0x2c18, 0x383c, 0x0806, 0x204e, + 0x122a, 0x2a18, 0x363c, 0x0606, 0x1e4e, + 0x122c, 0x2a1a, 0x363e, 0x0608, 0x1e50, + 0x142c, 0x2c1a, 0x383e, 0x0808, 0x2050, + 0x162c, 0x2e1a, 0x3a3e, 0x0a08, 0x2250, + 0x162e, 0x2e1c, 0x3a40, 0x0a0a, 0x2252, + 0x142e, 0x2c1c, 0x3840, 0x080a, 0x2052, + 0x122e, 0x2a1c, 0x3640, 0x060a, 0x1e52, + 0x1230, 0x2a1e, 0x3642, 0x060c, 0x1e54, + 0x1430, 0x2c1e, 0x3842, 0x080c, 0x2054, + 0x1630, 0x2e1e, 0x3a42, 0x0a0c, 0x2254, + 0x1632, 0x2e20, 0x3a44, 0x0a0e, 0x2256, + 0x1432, 0x2c20, 0x3844, 0x080e, 0x2056, + 0x1232, 0x2a20, 0x3644, 0x060e, 0x1e56, + 0x1234, 0x2a22, 0x3646, 0x0610, 0x1e58, + 0x1434, 0x2c22, 0x3846, 0x0810, 0x2058, + 0x1634, 0x2e22, 0x3a46, 0x0a10, 0x2258, + 0x1824, 0x3012, 0x3c36, 0x0c00, 0x2448, + 0x1a24, 0x3212, 0x3e36, 0x0e00, 0x2648, + 0x1c24, 0x3412, 0x4036, 0x1000, 0x2848, + 0x1c26, 0x3414, 0x4038, 0x1002, 0x284a, + 0x1a26, 0x3214, 0x3e38, 0x0e02, 0x264a, + 0x1826, 0x3014, 0x3c38, 0x0c02, 0x244a, + 0x1828, 0x3016, 0x3c3a, 0x0c04, 0x244c, + 0x1a28, 0x3216, 0x3e3a, 0x0e04, 0x264c, + 0x1c28, 0x3416, 0x403a, 0x1004, 0x284c, + 0x1c2a, 0x3418, 0x403c, 0x1006, 0x284e, + 0x1a2a, 0x3218, 0x3e3c, 0x0e06, 0x264e, + 0x182a, 0x3018, 0x3c3c, 0x0c06, 0x244e, + 0x182c, 0x301a, 0x3c3e, 0x0c08, 0x2450, + 0x1a2c, 0x321a, 0x3e3e, 0x0e08, 0x2650, + 0x1c2c, 0x341a, 0x403e, 0x1008, 0x2850, + 0x1c2e, 0x341c, 0x4040, 0x100a, 0x2852, + 0x1a2e, 0x321c, 0x3e40, 0x0e0a, 0x2652, + 0x182e, 0x301c, 0x3c40, 0x0c0a, 0x2452, + 0x1830, 0x301e, 0x3c42, 0x0c0c, 0x2454, + 0x1a30, 0x321e, 0x3e42, 0x0e0c, 0x2654, + 0x1c30, 0x341e, 0x4042, 0x100c, 0x2854, + 0x1c32, 0x3420, 0x4044, 0x100e, 0x2856, + 0x1a32, 0x3220, 0x3e44, 0x0e0e, 0x2656, + 0x1832, 0x3020, 0x3c44, 0x0c0e, 0x2456, + 0x1834, 0x3022, 0x3c46, 0x0c10, 0x2458, + 0x1a34, 0x3222, 0x3e46, 0x0e10, 0x2658, + 0x1c34, 0x3422, 0x4046, 0x1010, 0x2858, + 0x1e24, 0x3612, 0x4236, 0x1200, 0x2a48, + 0x2024, 0x3812, 0x4436, 0x1400, 0x2c48, + 0x2224, 0x3a12, 0x4636, 0x1600, 0x2e48, + 0x2226, 0x3a14, 0x4638, 0x1602, 0x2e4a, + 0x2026, 0x3814, 0x4438, 0x1402, 0x2c4a, + 0x1e26, 0x3614, 0x4238, 0x1202, 0x2a4a, + 0x1e28, 0x3616, 0x423a, 0x1204, 0x2a4c, + 0x2028, 0x3816, 0x443a, 0x1404, 0x2c4c, + 0x2228, 0x3a16, 0x463a, 0x1604, 0x2e4c, + 0x222a, 0x3a18, 0x463c, 0x1606, 0x2e4e, + 0x202a, 0x3818, 0x443c, 0x1406, 0x2c4e, + 0x1e2a, 0x3618, 0x423c, 0x1206, 0x2a4e, + 0x1e2c, 0x361a, 0x423e, 0x1208, 0x2a50, + 0x202c, 0x381a, 0x443e, 0x1408, 0x2c50, + 0x222c, 0x3a1a, 0x463e, 0x1608, 0x2e50, + 0x222e, 0x3a1c, 0x4640, 0x160a, 0x2e52, + 0x202e, 0x381c, 0x4440, 0x140a, 0x2c52, + 0x1e2e, 0x361c, 0x4240, 0x120a, 0x2a52, + 0x1e30, 0x361e, 0x4242, 0x120c, 0x2a54, + 0x2030, 0x381e, 0x4442, 0x140c, 0x2c54, + 0x2230, 0x3a1e, 0x4642, 0x160c, 0x2e54, + 0x2232, 0x3a20, 0x4644, 0x160e, 0x2e56, + 0x2032, 0x3820, 0x4444, 0x140e, 0x2c56, + 0x1e32, 0x3620, 0x4244, 0x120e, 0x2a56, + 0x1e34, 0x3622, 0x4246, 0x1210, 0x2a58, + 0x2034, 0x3822, 0x4446, 0x1410, 0x2c58, + 0x2234, 0x3a22, 0x4646, 0x1610, 0x2e58, + 0x2424, 0x3c12, 0x0036, 0x1800, 0x3048, + 0x2624, 0x3e12, 0x0236, 0x1a00, 0x3248, + 0x2824, 0x4012, 0x0436, 0x1c00, 0x3448, + 0x2826, 0x4014, 0x0438, 0x1c02, 0x344a, + 0x2626, 0x3e14, 0x0238, 0x1a02, 0x324a, + 0x2426, 0x3c14, 0x0038, 0x1802, 0x304a, + 0x2428, 0x3c16, 0x003a, 0x1804, 0x304c, + 0x2628, 0x3e16, 0x023a, 0x1a04, 0x324c, + 0x2828, 0x4016, 0x043a, 0x1c04, 0x344c, + 0x282a, 0x4018, 0x043c, 0x1c06, 0x344e, + 0x262a, 0x3e18, 0x023c, 0x1a06, 0x324e, + 0x242a, 0x3c18, 0x003c, 0x1806, 0x304e, + 0x242c, 0x3c1a, 0x003e, 0x1808, 0x3050, + 0x262c, 0x3e1a, 0x023e, 0x1a08, 0x3250, + 0x282c, 0x401a, 0x043e, 0x1c08, 0x3450, + 0x282e, 0x401c, 0x0440, 0x1c0a, 0x3452, + 0x262e, 0x3e1c, 0x0240, 0x1a0a, 0x3252, + 0x242e, 0x3c1c, 0x0040, 0x180a, 0x3052, + 0x2430, 0x3c1e, 0x0042, 0x180c, 0x3054, + 0x2630, 0x3e1e, 0x0242, 0x1a0c, 0x3254, + 0x2830, 0x401e, 0x0442, 0x1c0c, 0x3454, + 0x2832, 0x4020, 0x0444, 0x1c0e, 0x3456, + 0x2632, 0x3e20, 0x0244, 0x1a0e, 0x3256, + 0x2432, 0x3c20, 0x0044, 0x180e, 0x3056, + 0x2434, 0x3c22, 0x0046, 0x1810, 0x3058, + 0x2634, 0x3e22, 0x0246, 0x1a10, 0x3258, + 0x2834, 0x4022, 0x0446, 0x1c10, 0x3458, + 0x2a24, 0x4212, 0x0636, 0x1e00, 0x3648, + 0x2c24, 0x4412, 0x0836, 0x2000, 0x3848, + 0x2e24, 0x4612, 0x0a36, 0x2200, 0x3a48, + 0x2e26, 0x4614, 0x0a38, 0x2202, 0x3a4a, + 0x2c26, 0x4414, 0x0838, 0x2002, 0x384a, + 0x2a26, 0x4214, 0x0638, 0x1e02, 0x364a, + 0x2a28, 0x4216, 0x063a, 0x1e04, 0x364c, + 0x2c28, 0x4416, 0x083a, 0x2004, 0x384c, + 0x2e28, 0x4616, 0x0a3a, 0x2204, 0x3a4c, + 0x2e2a, 0x4618, 0x0a3c, 0x2206, 0x3a4e, + 0x2c2a, 0x4418, 0x083c, 0x2006, 0x384e, + 0x2a2a, 0x4218, 0x063c, 0x1e06, 0x364e, + 0x2a2c, 0x421a, 0x063e, 0x1e08, 0x3650, + 0x2c2c, 0x441a, 0x083e, 0x2008, 0x3850, + 0x2e2c, 0x461a, 0x0a3e, 0x2208, 0x3a50, + 0x2e2e, 0x461c, 0x0a40, 0x220a, 0x3a52, + 0x2c2e, 0x441c, 0x0840, 0x200a, 0x3852, + 0x2a2e, 0x421c, 0x0640, 0x1e0a, 0x3652, + 0x2a30, 0x421e, 0x0642, 0x1e0c, 0x3654, + 0x2c30, 0x441e, 0x0842, 0x200c, 0x3854, + 0x2e30, 0x461e, 0x0a42, 0x220c, 0x3a54, + 0x2e32, 0x4620, 0x0a44, 0x220e, 0x3a56, + 0x2c32, 0x4420, 0x0844, 0x200e, 0x3856, + 0x2a32, 0x4220, 0x0644, 0x1e0e, 0x3656, + 0x2a34, 0x4222, 0x0646, 0x1e10, 0x3658, + 0x2c34, 0x4422, 0x0846, 0x2010, 0x3858, + 0x2e34, 0x4622, 0x0a46, 0x2210, 0x3a58, + 0x3024, 0x0012, 0x0c36, 0x2400, 0x3c48, + 0x3224, 0x0212, 0x0e36, 0x2600, 0x3e48, + 0x3424, 0x0412, 0x1036, 0x2800, 0x4048, + 0x3426, 0x0414, 0x1038, 0x2802, 0x404a, + 0x3226, 0x0214, 0x0e38, 0x2602, 0x3e4a, + 0x3026, 0x0014, 0x0c38, 0x2402, 0x3c4a, + 0x3028, 0x0016, 0x0c3a, 0x2404, 0x3c4c, + 0x3228, 0x0216, 0x0e3a, 0x2604, 0x3e4c, + 0x3428, 0x0416, 0x103a, 0x2804, 0x404c, + 0x342a, 0x0418, 0x103c, 0x2806, 0x404e, + 0x322a, 0x0218, 0x0e3c, 0x2606, 0x3e4e, + 0x302a, 0x0018, 0x0c3c, 0x2406, 0x3c4e, + 0x302c, 0x001a, 0x0c3e, 0x2408, 0x3c50, + 0x322c, 0x021a, 0x0e3e, 0x2608, 0x3e50, + 0x342c, 0x041a, 0x103e, 0x2808, 0x4050, + 0x342e, 0x041c, 0x1040, 0x280a, 0x4052, + 0x322e, 0x021c, 0x0e40, 0x260a, 0x3e52, + 0x302e, 0x001c, 0x0c40, 0x240a, 0x3c52, + 0x3030, 0x001e, 0x0c42, 0x240c, 0x3c54, + 0x3230, 0x021e, 0x0e42, 0x260c, 0x3e54, + 0x3430, 0x041e, 0x1042, 0x280c, 0x4054, + 0x3432, 0x0420, 0x1044, 0x280e, 0x4056, + 0x3232, 0x0220, 0x0e44, 0x260e, 0x3e56, + 0x3032, 0x0020, 0x0c44, 0x240e, 0x3c56, + 0x3034, 0x0022, 0x0c46, 0x2410, 0x3c58, + 0x3234, 0x0222, 0x0e46, 0x2610, 0x3e58, + 0x3434, 0x0422, 0x1046, 0x2810, 0x4058, + 0x3624, 0x0612, 0x1236, 0x2a00, 0x4248, + 0x3824, 0x0812, 0x1436, 0x2c00, 0x4448, + 0x3a24, 0x0a12, 0x1636, 0x2e00, 0x4648, + 0x3a26, 0x0a14, 0x1638, 0x2e02, 0x464a, + 0x3826, 0x0814, 0x1438, 0x2c02, 0x444a, + 0x3626, 0x0614, 0x1238, 0x2a02, 0x424a, + 0x3628, 0x0616, 0x123a, 0x2a04, 0x424c, + 0x3828, 0x0816, 0x143a, 0x2c04, 0x444c, + 0x3a28, 0x0a16, 0x163a, 0x2e04, 0x464c, + 0x3a2a, 0x0a18, 0x163c, 0x2e06, 0x464e, + 0x382a, 0x0818, 0x143c, 0x2c06, 0x444e, + 0x362a, 0x0618, 0x123c, 0x2a06, 0x424e, + 0x362c, 0x061a, 0x123e, 0x2a08, 0x4250, + 0x382c, 0x081a, 0x143e, 0x2c08, 0x4450, + 0x3a2c, 0x0a1a, 0x163e, 0x2e08, 0x4650, + 0x3a2e, 0x0a1c, 0x1640, 0x2e0a, 0x4652, + 0x382e, 0x081c, 0x1440, 0x2c0a, 0x4452, + 0x362e, 0x061c, 0x1240, 0x2a0a, 0x4252, + 0x3630, 0x061e, 0x1242, 0x2a0c, 0x4254, + 0x3830, 0x081e, 0x1442, 0x2c0c, 0x4454, + 0x3a30, 0x0a1e, 0x1642, 0x2e0c, 0x4654, + 0x3a32, 0x0a20, 0x1644, 0x2e0e, 0x4656, + 0x3832, 0x0820, 0x1444, 0x2c0e, 0x4456, + 0x3632, 0x0620, 0x1244, 0x2a0e, 0x4256, + 0x3634, 0x0622, 0x1246, 0x2a10, 0x4258, + 0x3834, 0x0822, 0x1446, 0x2c10, 0x4458, + 0x3a34, 0x0a22, 0x1646, 0x2e10, 0x4658, + 0x3c24, 0x0c12, 0x1836, 0x3000, 0x0048, + 0x3e24, 0x0e12, 0x1a36, 0x3200, 0x0248, + 0x4024, 0x1012, 0x1c36, 0x3400, 0x0448, + 0x4026, 0x1014, 0x1c38, 0x3402, 0x044a, + 0x3e26, 0x0e14, 0x1a38, 0x3202, 0x024a, + 0x3c26, 0x0c14, 0x1838, 0x3002, 0x004a, + 0x3c28, 0x0c16, 0x183a, 0x3004, 0x004c, + 0x3e28, 0x0e16, 0x1a3a, 0x3204, 0x024c, + 0x4028, 0x1016, 0x1c3a, 0x3404, 0x044c, + 0x402a, 0x1018, 0x1c3c, 0x3406, 0x044e, + 0x3e2a, 0x0e18, 0x1a3c, 0x3206, 0x024e, + 0x3c2a, 0x0c18, 0x183c, 0x3006, 0x004e, + 0x3c2c, 0x0c1a, 0x183e, 0x3008, 0x0050, + 0x3e2c, 0x0e1a, 0x1a3e, 0x3208, 0x0250, + 0x402c, 0x101a, 0x1c3e, 0x3408, 0x0450, + 0x402e, 0x101c, 0x1c40, 0x340a, 0x0452, + 0x3e2e, 0x0e1c, 0x1a40, 0x320a, 0x0252, + 0x3c2e, 0x0c1c, 0x1840, 0x300a, 0x0052, + 0x3c30, 0x0c1e, 0x1842, 0x300c, 0x0054, + 0x3e30, 0x0e1e, 0x1a42, 0x320c, 0x0254, + 0x4030, 0x101e, 0x1c42, 0x340c, 0x0454, + 0x4032, 0x1020, 0x1c44, 0x340e, 0x0456, + 0x3e32, 0x0e20, 0x1a44, 0x320e, 0x0256, + 0x3c32, 0x0c20, 0x1844, 0x300e, 0x0056, + 0x3c34, 0x0c22, 0x1846, 0x3010, 0x0058, + 0x3e34, 0x0e22, 0x1a46, 0x3210, 0x0258, + 0x4034, 0x1022, 0x1c46, 0x3410, 0x0458, + 0x4224, 0x1212, 0x1e36, 0x3600, 0x0648, + 0x4424, 0x1412, 0x2036, 0x3800, 0x0848, + 0x4624, 0x1612, 0x2236, 0x3a00, 0x0a48, + 0x4626, 0x1614, 0x2238, 0x3a02, 0x0a4a, + 0x4426, 0x1414, 0x2038, 0x3802, 0x084a, + 0x4226, 0x1214, 0x1e38, 0x3602, 0x064a, + 0x4228, 0x1216, 0x1e3a, 0x3604, 0x064c, + 0x4428, 0x1416, 0x203a, 0x3804, 0x084c, + 0x4628, 0x1616, 0x223a, 0x3a04, 0x0a4c, + 0x462a, 0x1618, 0x223c, 0x3a06, 0x0a4e, + 0x442a, 0x1418, 0x203c, 0x3806, 0x084e, + 0x422a, 0x1218, 0x1e3c, 0x3606, 0x064e, + 0x422c, 0x121a, 0x1e3e, 0x3608, 0x0650, + 0x442c, 0x141a, 0x203e, 0x3808, 0x0850, + 0x462c, 0x161a, 0x223e, 0x3a08, 0x0a50, + 0x462e, 0x161c, 0x2240, 0x3a0a, 0x0a52, + 0x442e, 0x141c, 0x2040, 0x380a, 0x0852, + 0x422e, 0x121c, 0x1e40, 0x360a, 0x0652, + 0x4230, 0x121e, 0x1e42, 0x360c, 0x0654, + 0x4430, 0x141e, 0x2042, 0x380c, 0x0854, + 0x4630, 0x161e, 0x2242, 0x3a0c, 0x0a54, + 0x4632, 0x1620, 0x2244, 0x3a0e, 0x0a56, + 0x4432, 0x1420, 0x2044, 0x380e, 0x0856, + 0x4232, 0x1220, 0x1e44, 0x360e, 0x0656, + 0x4234, 0x1222, 0x1e46, 0x3610, 0x0658, + 0x4434, 0x1422, 0x2046, 0x3810, 0x0858, + 0x4634, 0x1622, 0x2246, 0x3a10, 0x0a58, + 0x0024, 0x1812, 0x2436, 0x3c00, 0x0c48, + 0x0224, 0x1a12, 0x2636, 0x3e00, 0x0e48, + 0x0424, 0x1c12, 0x2836, 0x4000, 0x1048, + 0x0426, 0x1c14, 0x2838, 0x4002, 0x104a, + 0x0226, 0x1a14, 0x2638, 0x3e02, 0x0e4a, + 0x0026, 0x1814, 0x2438, 0x3c02, 0x0c4a, + 0x0028, 0x1816, 0x243a, 0x3c04, 0x0c4c, + 0x0228, 0x1a16, 0x263a, 0x3e04, 0x0e4c, + 0x0428, 0x1c16, 0x283a, 0x4004, 0x104c, + 0x042a, 0x1c18, 0x283c, 0x4006, 0x104e, + 0x022a, 0x1a18, 0x263c, 0x3e06, 0x0e4e, + 0x002a, 0x1818, 0x243c, 0x3c06, 0x0c4e, + 0x002c, 0x181a, 0x243e, 0x3c08, 0x0c50, + 0x022c, 0x1a1a, 0x263e, 0x3e08, 0x0e50, + 0x042c, 0x1c1a, 0x283e, 0x4008, 0x1050, + 0x042e, 0x1c1c, 0x2840, 0x400a, 0x1052, + 0x022e, 0x1a1c, 0x2640, 0x3e0a, 0x0e52, + 0x002e, 0x181c, 0x2440, 0x3c0a, 0x0c52, + 0x0030, 0x181e, 0x2442, 0x3c0c, 0x0c54, + 0x0230, 0x1a1e, 0x2642, 0x3e0c, 0x0e54, + 0x0430, 0x1c1e, 0x2842, 0x400c, 0x1054, + 0x0432, 0x1c20, 0x2844, 0x400e, 0x1056, + 0x0232, 0x1a20, 0x2644, 0x3e0e, 0x0e56, + 0x0032, 0x1820, 0x2444, 0x3c0e, 0x0c56, + 0x0034, 0x1822, 0x2446, 0x3c10, 0x0c58, + 0x0234, 0x1a22, 0x2646, 0x3e10, 0x0e58, + 0x0434, 0x1c22, 0x2846, 0x4010, 0x1058, + 0x0624, 0x1e12, 0x2a36, 0x4200, 0x1248, + 0x0824, 0x2012, 0x2c36, 0x4400, 0x1448, + 0x0a24, 0x2212, 0x2e36, 0x4600, 0x1648, + 0x0a26, 0x2214, 0x2e38, 0x4602, 0x164a, + 0x0826, 0x2014, 0x2c38, 0x4402, 0x144a, + 0x0626, 0x1e14, 0x2a38, 0x4202, 0x124a, + 0x0628, 0x1e16, 0x2a3a, 0x4204, 0x124c, + 0x0828, 0x2016, 0x2c3a, 0x4404, 0x144c, + 0x0a28, 0x2216, 0x2e3a, 0x4604, 0x164c, + 0x0a2a, 0x2218, 0x2e3c, 0x4606, 0x164e, + 0x082a, 0x2018, 0x2c3c, 0x4406, 0x144e, + 0x062a, 0x1e18, 0x2a3c, 0x4206, 0x124e, + 0x062c, 0x1e1a, 0x2a3e, 0x4208, 0x1250, + 0x082c, 0x201a, 0x2c3e, 0x4408, 0x1450, + 0x0a2c, 0x221a, 0x2e3e, 0x4608, 0x1650, + 0x0a2e, 0x221c, 0x2e40, 0x460a, 0x1652, + 0x082e, 0x201c, 0x2c40, 0x440a, 0x1452, + 0x062e, 0x1e1c, 0x2a40, 0x420a, 0x1252, + 0x0630, 0x1e1e, 0x2a42, 0x420c, 0x1254, + 0x0830, 0x201e, 0x2c42, 0x440c, 0x1454, + 0x0a30, 0x221e, 0x2e42, 0x460c, 0x1654, + 0x0a32, 0x2220, 0x2e44, 0x460e, 0x1656, + 0x0832, 0x2020, 0x2c44, 0x440e, 0x1456, + 0x0632, 0x1e20, 0x2a44, 0x420e, 0x1256, + 0x0634, 0x1e22, 0x2a46, 0x4210, 0x1258, + 0x0834, 0x2022, 0x2c46, 0x4410, 0x1458, + 0x0a34, 0x2222, 0x2e46, 0x4610, 0x1658, +}; + +static const uint16_t dv_place_411P[1620] = { + 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848, + 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948, + 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48, + 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48, + 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48, + 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48, + 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c, + 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c, + 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c, + 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c, + 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c, + 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c, + 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850, + 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950, + 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50, + 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50, + 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50, + 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50, + 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54, + 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54, + 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54, + 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54, + 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954, + 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854, + 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858, + 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58, + 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58, + 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48, + 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48, + 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048, + 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148, + 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248, + 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348, + 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c, + 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c, + 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c, + 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c, + 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c, + 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c, + 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50, + 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50, + 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050, + 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150, + 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250, + 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350, + 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354, + 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254, + 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154, + 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054, + 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54, + 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54, + 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58, + 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058, + 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258, + 0x1824, 0x3310, 0x3f34, 0x0c00, 0x2448, + 0x1924, 0x3410, 0x4034, 0x0d00, 0x2548, + 0x1a24, 0x3510, 0x4134, 0x0e00, 0x2648, + 0x1b24, 0x3514, 0x4138, 0x0f00, 0x2748, + 0x1c24, 0x3414, 0x4038, 0x1000, 0x2848, + 0x1d24, 0x3314, 0x3f38, 0x1100, 0x2948, + 0x1d28, 0x3214, 0x3e38, 0x1104, 0x294c, + 0x1c28, 0x3114, 0x3d38, 0x1004, 0x284c, + 0x1b28, 0x3014, 0x3c38, 0x0f04, 0x274c, + 0x1a28, 0x3018, 0x3c3c, 0x0e04, 0x264c, + 0x1928, 0x3118, 0x3d3c, 0x0d04, 0x254c, + 0x1828, 0x3218, 0x3e3c, 0x0c04, 0x244c, + 0x182c, 0x3318, 0x3f3c, 0x0c08, 0x2450, + 0x192c, 0x3418, 0x403c, 0x0d08, 0x2550, + 0x1a2c, 0x3518, 0x413c, 0x0e08, 0x2650, + 0x1b2c, 0x351c, 0x4140, 0x0f08, 0x2750, + 0x1c2c, 0x341c, 0x4040, 0x1008, 0x2850, + 0x1d2c, 0x331c, 0x3f40, 0x1108, 0x2950, + 0x1d30, 0x321c, 0x3e40, 0x110c, 0x2954, + 0x1c30, 0x311c, 0x3d40, 0x100c, 0x2854, + 0x1b30, 0x301c, 0x3c40, 0x0f0c, 0x2754, + 0x1a30, 0x3020, 0x3c44, 0x0e0c, 0x2654, + 0x1930, 0x3120, 0x3d44, 0x0d0c, 0x2554, + 0x1830, 0x3220, 0x3e44, 0x0c0c, 0x2454, + 0x1834, 0x3320, 0x3f44, 0x0c10, 0x2458, + 0x1934, 0x3420, 0x4044, 0x0d10, 0x2658, + 0x1a34, 0x3520, 0x4144, 0x0e10, 0x2858, + 0x1e24, 0x3910, 0x4534, 0x1200, 0x2a48, + 0x1f24, 0x3a10, 0x4634, 0x1300, 0x2b48, + 0x2024, 0x3b10, 0x4734, 0x1400, 0x2c48, + 0x2124, 0x3b14, 0x4738, 0x1500, 0x2d48, + 0x2224, 0x3a14, 0x4638, 0x1600, 0x2e48, + 0x2324, 0x3914, 0x4538, 0x1700, 0x2f48, + 0x2328, 0x3814, 0x4438, 0x1704, 0x2f4c, + 0x2228, 0x3714, 0x4338, 0x1604, 0x2e4c, + 0x2128, 0x3614, 0x4238, 0x1504, 0x2d4c, + 0x2028, 0x3618, 0x423c, 0x1404, 0x2c4c, + 0x1f28, 0x3718, 0x433c, 0x1304, 0x2b4c, + 0x1e28, 0x3818, 0x443c, 0x1204, 0x2a4c, + 0x1e2c, 0x3918, 0x453c, 0x1208, 0x2a50, + 0x1f2c, 0x3a18, 0x463c, 0x1308, 0x2b50, + 0x202c, 0x3b18, 0x473c, 0x1408, 0x2c50, + 0x212c, 0x3b1c, 0x4740, 0x1508, 0x2d50, + 0x222c, 0x3a1c, 0x4640, 0x1608, 0x2e50, + 0x232c, 0x391c, 0x4540, 0x1708, 0x2f50, + 0x2330, 0x381c, 0x4440, 0x170c, 0x2f54, + 0x2230, 0x371c, 0x4340, 0x160c, 0x2e54, + 0x2130, 0x361c, 0x4240, 0x150c, 0x2d54, + 0x2030, 0x3620, 0x4244, 0x140c, 0x2c54, + 0x1f30, 0x3720, 0x4344, 0x130c, 0x2b54, + 0x1e30, 0x3820, 0x4444, 0x120c, 0x2a54, + 0x1e34, 0x3920, 0x4544, 0x1210, 0x2a58, + 0x1f34, 0x3a20, 0x4644, 0x1310, 0x2c58, + 0x2034, 0x3b20, 0x4744, 0x1410, 0x2e58, + 0x2424, 0x3f10, 0x0334, 0x1800, 0x3048, + 0x2524, 0x4010, 0x0434, 0x1900, 0x3148, + 0x2624, 0x4110, 0x0534, 0x1a00, 0x3248, + 0x2724, 0x4114, 0x0538, 0x1b00, 0x3348, + 0x2824, 0x4014, 0x0438, 0x1c00, 0x3448, + 0x2924, 0x3f14, 0x0338, 0x1d00, 0x3548, + 0x2928, 0x3e14, 0x0238, 0x1d04, 0x354c, + 0x2828, 0x3d14, 0x0138, 0x1c04, 0x344c, + 0x2728, 0x3c14, 0x0038, 0x1b04, 0x334c, + 0x2628, 0x3c18, 0x003c, 0x1a04, 0x324c, + 0x2528, 0x3d18, 0x013c, 0x1904, 0x314c, + 0x2428, 0x3e18, 0x023c, 0x1804, 0x304c, + 0x242c, 0x3f18, 0x033c, 0x1808, 0x3050, + 0x252c, 0x4018, 0x043c, 0x1908, 0x3150, + 0x262c, 0x4118, 0x053c, 0x1a08, 0x3250, + 0x272c, 0x411c, 0x0540, 0x1b08, 0x3350, + 0x282c, 0x401c, 0x0440, 0x1c08, 0x3450, + 0x292c, 0x3f1c, 0x0340, 0x1d08, 0x3550, + 0x2930, 0x3e1c, 0x0240, 0x1d0c, 0x3554, + 0x2830, 0x3d1c, 0x0140, 0x1c0c, 0x3454, + 0x2730, 0x3c1c, 0x0040, 0x1b0c, 0x3354, + 0x2630, 0x3c20, 0x0044, 0x1a0c, 0x3254, + 0x2530, 0x3d20, 0x0144, 0x190c, 0x3154, + 0x2430, 0x3e20, 0x0244, 0x180c, 0x3054, + 0x2434, 0x3f20, 0x0344, 0x1810, 0x3058, + 0x2534, 0x4020, 0x0444, 0x1910, 0x3258, + 0x2634, 0x4120, 0x0544, 0x1a10, 0x3458, + 0x2a24, 0x4510, 0x0934, 0x1e00, 0x3648, + 0x2b24, 0x4610, 0x0a34, 0x1f00, 0x3748, + 0x2c24, 0x4710, 0x0b34, 0x2000, 0x3848, + 0x2d24, 0x4714, 0x0b38, 0x2100, 0x3948, + 0x2e24, 0x4614, 0x0a38, 0x2200, 0x3a48, + 0x2f24, 0x4514, 0x0938, 0x2300, 0x3b48, + 0x2f28, 0x4414, 0x0838, 0x2304, 0x3b4c, + 0x2e28, 0x4314, 0x0738, 0x2204, 0x3a4c, + 0x2d28, 0x4214, 0x0638, 0x2104, 0x394c, + 0x2c28, 0x4218, 0x063c, 0x2004, 0x384c, + 0x2b28, 0x4318, 0x073c, 0x1f04, 0x374c, + 0x2a28, 0x4418, 0x083c, 0x1e04, 0x364c, + 0x2a2c, 0x4518, 0x093c, 0x1e08, 0x3650, + 0x2b2c, 0x4618, 0x0a3c, 0x1f08, 0x3750, + 0x2c2c, 0x4718, 0x0b3c, 0x2008, 0x3850, + 0x2d2c, 0x471c, 0x0b40, 0x2108, 0x3950, + 0x2e2c, 0x461c, 0x0a40, 0x2208, 0x3a50, + 0x2f2c, 0x451c, 0x0940, 0x2308, 0x3b50, + 0x2f30, 0x441c, 0x0840, 0x230c, 0x3b54, + 0x2e30, 0x431c, 0x0740, 0x220c, 0x3a54, + 0x2d30, 0x421c, 0x0640, 0x210c, 0x3954, + 0x2c30, 0x4220, 0x0644, 0x200c, 0x3854, + 0x2b30, 0x4320, 0x0744, 0x1f0c, 0x3754, + 0x2a30, 0x4420, 0x0844, 0x1e0c, 0x3654, + 0x2a34, 0x4520, 0x0944, 0x1e10, 0x3658, + 0x2b34, 0x4620, 0x0a44, 0x1f10, 0x3858, + 0x2c34, 0x4720, 0x0b44, 0x2010, 0x3a58, + 0x3024, 0x0310, 0x0f34, 0x2400, 0x3c48, + 0x3124, 0x0410, 0x1034, 0x2500, 0x3d48, + 0x3224, 0x0510, 0x1134, 0x2600, 0x3e48, + 0x3324, 0x0514, 0x1138, 0x2700, 0x3f48, + 0x3424, 0x0414, 0x1038, 0x2800, 0x4048, + 0x3524, 0x0314, 0x0f38, 0x2900, 0x4148, + 0x3528, 0x0214, 0x0e38, 0x2904, 0x414c, + 0x3428, 0x0114, 0x0d38, 0x2804, 0x404c, + 0x3328, 0x0014, 0x0c38, 0x2704, 0x3f4c, + 0x3228, 0x0018, 0x0c3c, 0x2604, 0x3e4c, + 0x3128, 0x0118, 0x0d3c, 0x2504, 0x3d4c, + 0x3028, 0x0218, 0x0e3c, 0x2404, 0x3c4c, + 0x302c, 0x0318, 0x0f3c, 0x2408, 0x3c50, + 0x312c, 0x0418, 0x103c, 0x2508, 0x3d50, + 0x322c, 0x0518, 0x113c, 0x2608, 0x3e50, + 0x332c, 0x051c, 0x1140, 0x2708, 0x3f50, + 0x342c, 0x041c, 0x1040, 0x2808, 0x4050, + 0x352c, 0x031c, 0x0f40, 0x2908, 0x4150, + 0x3530, 0x021c, 0x0e40, 0x290c, 0x4154, + 0x3430, 0x011c, 0x0d40, 0x280c, 0x4054, + 0x3330, 0x001c, 0x0c40, 0x270c, 0x3f54, + 0x3230, 0x0020, 0x0c44, 0x260c, 0x3e54, + 0x3130, 0x0120, 0x0d44, 0x250c, 0x3d54, + 0x3030, 0x0220, 0x0e44, 0x240c, 0x3c54, + 0x3034, 0x0320, 0x0f44, 0x2410, 0x3c58, + 0x3134, 0x0420, 0x1044, 0x2510, 0x3e58, + 0x3234, 0x0520, 0x1144, 0x2610, 0x4058, + 0x3624, 0x0910, 0x1534, 0x2a00, 0x4248, + 0x3724, 0x0a10, 0x1634, 0x2b00, 0x4348, + 0x3824, 0x0b10, 0x1734, 0x2c00, 0x4448, + 0x3924, 0x0b14, 0x1738, 0x2d00, 0x4548, + 0x3a24, 0x0a14, 0x1638, 0x2e00, 0x4648, + 0x3b24, 0x0914, 0x1538, 0x2f00, 0x4748, + 0x3b28, 0x0814, 0x1438, 0x2f04, 0x474c, + 0x3a28, 0x0714, 0x1338, 0x2e04, 0x464c, + 0x3928, 0x0614, 0x1238, 0x2d04, 0x454c, + 0x3828, 0x0618, 0x123c, 0x2c04, 0x444c, + 0x3728, 0x0718, 0x133c, 0x2b04, 0x434c, + 0x3628, 0x0818, 0x143c, 0x2a04, 0x424c, + 0x362c, 0x0918, 0x153c, 0x2a08, 0x4250, + 0x372c, 0x0a18, 0x163c, 0x2b08, 0x4350, + 0x382c, 0x0b18, 0x173c, 0x2c08, 0x4450, + 0x392c, 0x0b1c, 0x1740, 0x2d08, 0x4550, + 0x3a2c, 0x0a1c, 0x1640, 0x2e08, 0x4650, + 0x3b2c, 0x091c, 0x1540, 0x2f08, 0x4750, + 0x3b30, 0x081c, 0x1440, 0x2f0c, 0x4754, + 0x3a30, 0x071c, 0x1340, 0x2e0c, 0x4654, + 0x3930, 0x061c, 0x1240, 0x2d0c, 0x4554, + 0x3830, 0x0620, 0x1244, 0x2c0c, 0x4454, + 0x3730, 0x0720, 0x1344, 0x2b0c, 0x4354, + 0x3630, 0x0820, 0x1444, 0x2a0c, 0x4254, + 0x3634, 0x0920, 0x1544, 0x2a10, 0x4258, + 0x3734, 0x0a20, 0x1644, 0x2b10, 0x4458, + 0x3834, 0x0b20, 0x1744, 0x2c10, 0x4658, + 0x3c24, 0x0f10, 0x1b34, 0x3000, 0x0048, + 0x3d24, 0x1010, 0x1c34, 0x3100, 0x0148, + 0x3e24, 0x1110, 0x1d34, 0x3200, 0x0248, + 0x3f24, 0x1114, 0x1d38, 0x3300, 0x0348, + 0x4024, 0x1014, 0x1c38, 0x3400, 0x0448, + 0x4124, 0x0f14, 0x1b38, 0x3500, 0x0548, + 0x4128, 0x0e14, 0x1a38, 0x3504, 0x054c, + 0x4028, 0x0d14, 0x1938, 0x3404, 0x044c, + 0x3f28, 0x0c14, 0x1838, 0x3304, 0x034c, + 0x3e28, 0x0c18, 0x183c, 0x3204, 0x024c, + 0x3d28, 0x0d18, 0x193c, 0x3104, 0x014c, + 0x3c28, 0x0e18, 0x1a3c, 0x3004, 0x004c, + 0x3c2c, 0x0f18, 0x1b3c, 0x3008, 0x0050, + 0x3d2c, 0x1018, 0x1c3c, 0x3108, 0x0150, + 0x3e2c, 0x1118, 0x1d3c, 0x3208, 0x0250, + 0x3f2c, 0x111c, 0x1d40, 0x3308, 0x0350, + 0x402c, 0x101c, 0x1c40, 0x3408, 0x0450, + 0x412c, 0x0f1c, 0x1b40, 0x3508, 0x0550, + 0x4130, 0x0e1c, 0x1a40, 0x350c, 0x0554, + 0x4030, 0x0d1c, 0x1940, 0x340c, 0x0454, + 0x3f30, 0x0c1c, 0x1840, 0x330c, 0x0354, + 0x3e30, 0x0c20, 0x1844, 0x320c, 0x0254, + 0x3d30, 0x0d20, 0x1944, 0x310c, 0x0154, + 0x3c30, 0x0e20, 0x1a44, 0x300c, 0x0054, + 0x3c34, 0x0f20, 0x1b44, 0x3010, 0x0058, + 0x3d34, 0x1020, 0x1c44, 0x3110, 0x0258, + 0x3e34, 0x1120, 0x1d44, 0x3210, 0x0458, + 0x4224, 0x1510, 0x2134, 0x3600, 0x0648, + 0x4324, 0x1610, 0x2234, 0x3700, 0x0748, + 0x4424, 0x1710, 0x2334, 0x3800, 0x0848, + 0x4524, 0x1714, 0x2338, 0x3900, 0x0948, + 0x4624, 0x1614, 0x2238, 0x3a00, 0x0a48, + 0x4724, 0x1514, 0x2138, 0x3b00, 0x0b48, + 0x4728, 0x1414, 0x2038, 0x3b04, 0x0b4c, + 0x4628, 0x1314, 0x1f38, 0x3a04, 0x0a4c, + 0x4528, 0x1214, 0x1e38, 0x3904, 0x094c, + 0x4428, 0x1218, 0x1e3c, 0x3804, 0x084c, + 0x4328, 0x1318, 0x1f3c, 0x3704, 0x074c, + 0x4228, 0x1418, 0x203c, 0x3604, 0x064c, + 0x422c, 0x1518, 0x213c, 0x3608, 0x0650, + 0x432c, 0x1618, 0x223c, 0x3708, 0x0750, + 0x442c, 0x1718, 0x233c, 0x3808, 0x0850, + 0x452c, 0x171c, 0x2340, 0x3908, 0x0950, + 0x462c, 0x161c, 0x2240, 0x3a08, 0x0a50, + 0x472c, 0x151c, 0x2140, 0x3b08, 0x0b50, + 0x4730, 0x141c, 0x2040, 0x3b0c, 0x0b54, + 0x4630, 0x131c, 0x1f40, 0x3a0c, 0x0a54, + 0x4530, 0x121c, 0x1e40, 0x390c, 0x0954, + 0x4430, 0x1220, 0x1e44, 0x380c, 0x0854, + 0x4330, 0x1320, 0x1f44, 0x370c, 0x0754, + 0x4230, 0x1420, 0x2044, 0x360c, 0x0654, + 0x4234, 0x1520, 0x2144, 0x3610, 0x0658, + 0x4334, 0x1620, 0x2244, 0x3710, 0x0858, + 0x4434, 0x1720, 0x2344, 0x3810, 0x0a58, + 0x0024, 0x1b10, 0x2734, 0x3c00, 0x0c48, + 0x0124, 0x1c10, 0x2834, 0x3d00, 0x0d48, + 0x0224, 0x1d10, 0x2934, 0x3e00, 0x0e48, + 0x0324, 0x1d14, 0x2938, 0x3f00, 0x0f48, + 0x0424, 0x1c14, 0x2838, 0x4000, 0x1048, + 0x0524, 0x1b14, 0x2738, 0x4100, 0x1148, + 0x0528, 0x1a14, 0x2638, 0x4104, 0x114c, + 0x0428, 0x1914, 0x2538, 0x4004, 0x104c, + 0x0328, 0x1814, 0x2438, 0x3f04, 0x0f4c, + 0x0228, 0x1818, 0x243c, 0x3e04, 0x0e4c, + 0x0128, 0x1918, 0x253c, 0x3d04, 0x0d4c, + 0x0028, 0x1a18, 0x263c, 0x3c04, 0x0c4c, + 0x002c, 0x1b18, 0x273c, 0x3c08, 0x0c50, + 0x012c, 0x1c18, 0x283c, 0x3d08, 0x0d50, + 0x022c, 0x1d18, 0x293c, 0x3e08, 0x0e50, + 0x032c, 0x1d1c, 0x2940, 0x3f08, 0x0f50, + 0x042c, 0x1c1c, 0x2840, 0x4008, 0x1050, + 0x052c, 0x1b1c, 0x2740, 0x4108, 0x1150, + 0x0530, 0x1a1c, 0x2640, 0x410c, 0x1154, + 0x0430, 0x191c, 0x2540, 0x400c, 0x1054, + 0x0330, 0x181c, 0x2440, 0x3f0c, 0x0f54, + 0x0230, 0x1820, 0x2444, 0x3e0c, 0x0e54, + 0x0130, 0x1920, 0x2544, 0x3d0c, 0x0d54, + 0x0030, 0x1a20, 0x2644, 0x3c0c, 0x0c54, + 0x0034, 0x1b20, 0x2744, 0x3c10, 0x0c58, + 0x0134, 0x1c20, 0x2844, 0x3d10, 0x0e58, + 0x0234, 0x1d20, 0x2944, 0x3e10, 0x1058, + 0x0624, 0x2110, 0x2d34, 0x4200, 0x1248, + 0x0724, 0x2210, 0x2e34, 0x4300, 0x1348, + 0x0824, 0x2310, 0x2f34, 0x4400, 0x1448, + 0x0924, 0x2314, 0x2f38, 0x4500, 0x1548, + 0x0a24, 0x2214, 0x2e38, 0x4600, 0x1648, + 0x0b24, 0x2114, 0x2d38, 0x4700, 0x1748, + 0x0b28, 0x2014, 0x2c38, 0x4704, 0x174c, + 0x0a28, 0x1f14, 0x2b38, 0x4604, 0x164c, + 0x0928, 0x1e14, 0x2a38, 0x4504, 0x154c, + 0x0828, 0x1e18, 0x2a3c, 0x4404, 0x144c, + 0x0728, 0x1f18, 0x2b3c, 0x4304, 0x134c, + 0x0628, 0x2018, 0x2c3c, 0x4204, 0x124c, + 0x062c, 0x2118, 0x2d3c, 0x4208, 0x1250, + 0x072c, 0x2218, 0x2e3c, 0x4308, 0x1350, + 0x082c, 0x2318, 0x2f3c, 0x4408, 0x1450, + 0x092c, 0x231c, 0x2f40, 0x4508, 0x1550, + 0x0a2c, 0x221c, 0x2e40, 0x4608, 0x1650, + 0x0b2c, 0x211c, 0x2d40, 0x4708, 0x1750, + 0x0b30, 0x201c, 0x2c40, 0x470c, 0x1754, + 0x0a30, 0x1f1c, 0x2b40, 0x460c, 0x1654, + 0x0930, 0x1e1c, 0x2a40, 0x450c, 0x1554, + 0x0830, 0x1e20, 0x2a44, 0x440c, 0x1454, + 0x0730, 0x1f20, 0x2b44, 0x430c, 0x1354, + 0x0630, 0x2020, 0x2c44, 0x420c, 0x1254, + 0x0634, 0x2120, 0x2d44, 0x4210, 0x1258, + 0x0734, 0x2220, 0x2e44, 0x4310, 0x1458, + 0x0834, 0x2320, 0x2f44, 0x4410, 0x1658, +}; + +static const uint16_t dv_place_411[1350] = { + 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848, + 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948, + 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48, + 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48, + 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48, + 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48, + 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c, + 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c, + 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c, + 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c, + 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c, + 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c, + 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850, + 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950, + 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50, + 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50, + 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50, + 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50, + 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54, + 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54, + 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54, + 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54, + 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954, + 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854, + 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858, + 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58, + 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58, + 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48, + 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48, + 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048, + 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148, + 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248, + 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348, + 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c, + 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c, + 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c, + 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c, + 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c, + 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c, + 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50, + 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50, + 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050, + 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150, + 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250, + 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350, + 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354, + 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254, + 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154, + 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054, + 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54, + 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54, + 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58, + 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058, + 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258, + 0x1824, 0x3310, 0x0334, 0x0c00, 0x2448, + 0x1924, 0x3410, 0x0434, 0x0d00, 0x2548, + 0x1a24, 0x3510, 0x0534, 0x0e00, 0x2648, + 0x1b24, 0x3514, 0x0538, 0x0f00, 0x2748, + 0x1c24, 0x3414, 0x0438, 0x1000, 0x2848, + 0x1d24, 0x3314, 0x0338, 0x1100, 0x2948, + 0x1d28, 0x3214, 0x0238, 0x1104, 0x294c, + 0x1c28, 0x3114, 0x0138, 0x1004, 0x284c, + 0x1b28, 0x3014, 0x0038, 0x0f04, 0x274c, + 0x1a28, 0x3018, 0x003c, 0x0e04, 0x264c, + 0x1928, 0x3118, 0x013c, 0x0d04, 0x254c, + 0x1828, 0x3218, 0x023c, 0x0c04, 0x244c, + 0x182c, 0x3318, 0x033c, 0x0c08, 0x2450, + 0x192c, 0x3418, 0x043c, 0x0d08, 0x2550, + 0x1a2c, 0x3518, 0x053c, 0x0e08, 0x2650, + 0x1b2c, 0x351c, 0x0540, 0x0f08, 0x2750, + 0x1c2c, 0x341c, 0x0440, 0x1008, 0x2850, + 0x1d2c, 0x331c, 0x0340, 0x1108, 0x2950, + 0x1d30, 0x321c, 0x0240, 0x110c, 0x2954, + 0x1c30, 0x311c, 0x0140, 0x100c, 0x2854, + 0x1b30, 0x301c, 0x0040, 0x0f0c, 0x2754, + 0x1a30, 0x3020, 0x0044, 0x0e0c, 0x2654, + 0x1930, 0x3120, 0x0144, 0x0d0c, 0x2554, + 0x1830, 0x3220, 0x0244, 0x0c0c, 0x2454, + 0x1834, 0x3320, 0x0344, 0x0c10, 0x2458, + 0x1934, 0x3420, 0x0444, 0x0d10, 0x2658, + 0x1a34, 0x3520, 0x0544, 0x0e10, 0x2858, + 0x1e24, 0x3910, 0x0934, 0x1200, 0x2a48, + 0x1f24, 0x3a10, 0x0a34, 0x1300, 0x2b48, + 0x2024, 0x3b10, 0x0b34, 0x1400, 0x2c48, + 0x2124, 0x3b14, 0x0b38, 0x1500, 0x2d48, + 0x2224, 0x3a14, 0x0a38, 0x1600, 0x2e48, + 0x2324, 0x3914, 0x0938, 0x1700, 0x2f48, + 0x2328, 0x3814, 0x0838, 0x1704, 0x2f4c, + 0x2228, 0x3714, 0x0738, 0x1604, 0x2e4c, + 0x2128, 0x3614, 0x0638, 0x1504, 0x2d4c, + 0x2028, 0x3618, 0x063c, 0x1404, 0x2c4c, + 0x1f28, 0x3718, 0x073c, 0x1304, 0x2b4c, + 0x1e28, 0x3818, 0x083c, 0x1204, 0x2a4c, + 0x1e2c, 0x3918, 0x093c, 0x1208, 0x2a50, + 0x1f2c, 0x3a18, 0x0a3c, 0x1308, 0x2b50, + 0x202c, 0x3b18, 0x0b3c, 0x1408, 0x2c50, + 0x212c, 0x3b1c, 0x0b40, 0x1508, 0x2d50, + 0x222c, 0x3a1c, 0x0a40, 0x1608, 0x2e50, + 0x232c, 0x391c, 0x0940, 0x1708, 0x2f50, + 0x2330, 0x381c, 0x0840, 0x170c, 0x2f54, + 0x2230, 0x371c, 0x0740, 0x160c, 0x2e54, + 0x2130, 0x361c, 0x0640, 0x150c, 0x2d54, + 0x2030, 0x3620, 0x0644, 0x140c, 0x2c54, + 0x1f30, 0x3720, 0x0744, 0x130c, 0x2b54, + 0x1e30, 0x3820, 0x0844, 0x120c, 0x2a54, + 0x1e34, 0x3920, 0x0944, 0x1210, 0x2a58, + 0x1f34, 0x3a20, 0x0a44, 0x1310, 0x2c58, + 0x2034, 0x3b20, 0x0b44, 0x1410, 0x2e58, + 0x2424, 0x0310, 0x0f34, 0x1800, 0x3048, + 0x2524, 0x0410, 0x1034, 0x1900, 0x3148, + 0x2624, 0x0510, 0x1134, 0x1a00, 0x3248, + 0x2724, 0x0514, 0x1138, 0x1b00, 0x3348, + 0x2824, 0x0414, 0x1038, 0x1c00, 0x3448, + 0x2924, 0x0314, 0x0f38, 0x1d00, 0x3548, + 0x2928, 0x0214, 0x0e38, 0x1d04, 0x354c, + 0x2828, 0x0114, 0x0d38, 0x1c04, 0x344c, + 0x2728, 0x0014, 0x0c38, 0x1b04, 0x334c, + 0x2628, 0x0018, 0x0c3c, 0x1a04, 0x324c, + 0x2528, 0x0118, 0x0d3c, 0x1904, 0x314c, + 0x2428, 0x0218, 0x0e3c, 0x1804, 0x304c, + 0x242c, 0x0318, 0x0f3c, 0x1808, 0x3050, + 0x252c, 0x0418, 0x103c, 0x1908, 0x3150, + 0x262c, 0x0518, 0x113c, 0x1a08, 0x3250, + 0x272c, 0x051c, 0x1140, 0x1b08, 0x3350, + 0x282c, 0x041c, 0x1040, 0x1c08, 0x3450, + 0x292c, 0x031c, 0x0f40, 0x1d08, 0x3550, + 0x2930, 0x021c, 0x0e40, 0x1d0c, 0x3554, + 0x2830, 0x011c, 0x0d40, 0x1c0c, 0x3454, + 0x2730, 0x001c, 0x0c40, 0x1b0c, 0x3354, + 0x2630, 0x0020, 0x0c44, 0x1a0c, 0x3254, + 0x2530, 0x0120, 0x0d44, 0x190c, 0x3154, + 0x2430, 0x0220, 0x0e44, 0x180c, 0x3054, + 0x2434, 0x0320, 0x0f44, 0x1810, 0x3058, + 0x2534, 0x0420, 0x1044, 0x1910, 0x3258, + 0x2634, 0x0520, 0x1144, 0x1a10, 0x3458, + 0x2a24, 0x0910, 0x1534, 0x1e00, 0x3648, + 0x2b24, 0x0a10, 0x1634, 0x1f00, 0x3748, + 0x2c24, 0x0b10, 0x1734, 0x2000, 0x3848, + 0x2d24, 0x0b14, 0x1738, 0x2100, 0x3948, + 0x2e24, 0x0a14, 0x1638, 0x2200, 0x3a48, + 0x2f24, 0x0914, 0x1538, 0x2300, 0x3b48, + 0x2f28, 0x0814, 0x1438, 0x2304, 0x3b4c, + 0x2e28, 0x0714, 0x1338, 0x2204, 0x3a4c, + 0x2d28, 0x0614, 0x1238, 0x2104, 0x394c, + 0x2c28, 0x0618, 0x123c, 0x2004, 0x384c, + 0x2b28, 0x0718, 0x133c, 0x1f04, 0x374c, + 0x2a28, 0x0818, 0x143c, 0x1e04, 0x364c, + 0x2a2c, 0x0918, 0x153c, 0x1e08, 0x3650, + 0x2b2c, 0x0a18, 0x163c, 0x1f08, 0x3750, + 0x2c2c, 0x0b18, 0x173c, 0x2008, 0x3850, + 0x2d2c, 0x0b1c, 0x1740, 0x2108, 0x3950, + 0x2e2c, 0x0a1c, 0x1640, 0x2208, 0x3a50, + 0x2f2c, 0x091c, 0x1540, 0x2308, 0x3b50, + 0x2f30, 0x081c, 0x1440, 0x230c, 0x3b54, + 0x2e30, 0x071c, 0x1340, 0x220c, 0x3a54, + 0x2d30, 0x061c, 0x1240, 0x210c, 0x3954, + 0x2c30, 0x0620, 0x1244, 0x200c, 0x3854, + 0x2b30, 0x0720, 0x1344, 0x1f0c, 0x3754, + 0x2a30, 0x0820, 0x1444, 0x1e0c, 0x3654, + 0x2a34, 0x0920, 0x1544, 0x1e10, 0x3658, + 0x2b34, 0x0a20, 0x1644, 0x1f10, 0x3858, + 0x2c34, 0x0b20, 0x1744, 0x2010, 0x3a58, + 0x3024, 0x0f10, 0x1b34, 0x2400, 0x0048, + 0x3124, 0x1010, 0x1c34, 0x2500, 0x0148, + 0x3224, 0x1110, 0x1d34, 0x2600, 0x0248, + 0x3324, 0x1114, 0x1d38, 0x2700, 0x0348, + 0x3424, 0x1014, 0x1c38, 0x2800, 0x0448, + 0x3524, 0x0f14, 0x1b38, 0x2900, 0x0548, + 0x3528, 0x0e14, 0x1a38, 0x2904, 0x054c, + 0x3428, 0x0d14, 0x1938, 0x2804, 0x044c, + 0x3328, 0x0c14, 0x1838, 0x2704, 0x034c, + 0x3228, 0x0c18, 0x183c, 0x2604, 0x024c, + 0x3128, 0x0d18, 0x193c, 0x2504, 0x014c, + 0x3028, 0x0e18, 0x1a3c, 0x2404, 0x004c, + 0x302c, 0x0f18, 0x1b3c, 0x2408, 0x0050, + 0x312c, 0x1018, 0x1c3c, 0x2508, 0x0150, + 0x322c, 0x1118, 0x1d3c, 0x2608, 0x0250, + 0x332c, 0x111c, 0x1d40, 0x2708, 0x0350, + 0x342c, 0x101c, 0x1c40, 0x2808, 0x0450, + 0x352c, 0x0f1c, 0x1b40, 0x2908, 0x0550, + 0x3530, 0x0e1c, 0x1a40, 0x290c, 0x0554, + 0x3430, 0x0d1c, 0x1940, 0x280c, 0x0454, + 0x3330, 0x0c1c, 0x1840, 0x270c, 0x0354, + 0x3230, 0x0c20, 0x1844, 0x260c, 0x0254, + 0x3130, 0x0d20, 0x1944, 0x250c, 0x0154, + 0x3030, 0x0e20, 0x1a44, 0x240c, 0x0054, + 0x3034, 0x0f20, 0x1b44, 0x2410, 0x0058, + 0x3134, 0x1020, 0x1c44, 0x2510, 0x0258, + 0x3234, 0x1120, 0x1d44, 0x2610, 0x0458, + 0x3624, 0x1510, 0x2134, 0x2a00, 0x0648, + 0x3724, 0x1610, 0x2234, 0x2b00, 0x0748, + 0x3824, 0x1710, 0x2334, 0x2c00, 0x0848, + 0x3924, 0x1714, 0x2338, 0x2d00, 0x0948, + 0x3a24, 0x1614, 0x2238, 0x2e00, 0x0a48, + 0x3b24, 0x1514, 0x2138, 0x2f00, 0x0b48, + 0x3b28, 0x1414, 0x2038, 0x2f04, 0x0b4c, + 0x3a28, 0x1314, 0x1f38, 0x2e04, 0x0a4c, + 0x3928, 0x1214, 0x1e38, 0x2d04, 0x094c, + 0x3828, 0x1218, 0x1e3c, 0x2c04, 0x084c, + 0x3728, 0x1318, 0x1f3c, 0x2b04, 0x074c, + 0x3628, 0x1418, 0x203c, 0x2a04, 0x064c, + 0x362c, 0x1518, 0x213c, 0x2a08, 0x0650, + 0x372c, 0x1618, 0x223c, 0x2b08, 0x0750, + 0x382c, 0x1718, 0x233c, 0x2c08, 0x0850, + 0x392c, 0x171c, 0x2340, 0x2d08, 0x0950, + 0x3a2c, 0x161c, 0x2240, 0x2e08, 0x0a50, + 0x3b2c, 0x151c, 0x2140, 0x2f08, 0x0b50, + 0x3b30, 0x141c, 0x2040, 0x2f0c, 0x0b54, + 0x3a30, 0x131c, 0x1f40, 0x2e0c, 0x0a54, + 0x3930, 0x121c, 0x1e40, 0x2d0c, 0x0954, + 0x3830, 0x1220, 0x1e44, 0x2c0c, 0x0854, + 0x3730, 0x1320, 0x1f44, 0x2b0c, 0x0754, + 0x3630, 0x1420, 0x2044, 0x2a0c, 0x0654, + 0x3634, 0x1520, 0x2144, 0x2a10, 0x0658, + 0x3734, 0x1620, 0x2244, 0x2b10, 0x0858, + 0x3834, 0x1720, 0x2344, 0x2c10, 0x0a58, + 0x0024, 0x1b10, 0x2734, 0x3000, 0x0c48, + 0x0124, 0x1c10, 0x2834, 0x3100, 0x0d48, + 0x0224, 0x1d10, 0x2934, 0x3200, 0x0e48, + 0x0324, 0x1d14, 0x2938, 0x3300, 0x0f48, + 0x0424, 0x1c14, 0x2838, 0x3400, 0x1048, + 0x0524, 0x1b14, 0x2738, 0x3500, 0x1148, + 0x0528, 0x1a14, 0x2638, 0x3504, 0x114c, + 0x0428, 0x1914, 0x2538, 0x3404, 0x104c, + 0x0328, 0x1814, 0x2438, 0x3304, 0x0f4c, + 0x0228, 0x1818, 0x243c, 0x3204, 0x0e4c, + 0x0128, 0x1918, 0x253c, 0x3104, 0x0d4c, + 0x0028, 0x1a18, 0x263c, 0x3004, 0x0c4c, + 0x002c, 0x1b18, 0x273c, 0x3008, 0x0c50, + 0x012c, 0x1c18, 0x283c, 0x3108, 0x0d50, + 0x022c, 0x1d18, 0x293c, 0x3208, 0x0e50, + 0x032c, 0x1d1c, 0x2940, 0x3308, 0x0f50, + 0x042c, 0x1c1c, 0x2840, 0x3408, 0x1050, + 0x052c, 0x1b1c, 0x2740, 0x3508, 0x1150, + 0x0530, 0x1a1c, 0x2640, 0x350c, 0x1154, + 0x0430, 0x191c, 0x2540, 0x340c, 0x1054, + 0x0330, 0x181c, 0x2440, 0x330c, 0x0f54, + 0x0230, 0x1820, 0x2444, 0x320c, 0x0e54, + 0x0130, 0x1920, 0x2544, 0x310c, 0x0d54, + 0x0030, 0x1a20, 0x2644, 0x300c, 0x0c54, + 0x0034, 0x1b20, 0x2744, 0x3010, 0x0c58, + 0x0134, 0x1c20, 0x2844, 0x3110, 0x0e58, + 0x0234, 0x1d20, 0x2944, 0x3210, 0x1058, + 0x0624, 0x2110, 0x2d34, 0x3600, 0x1248, + 0x0724, 0x2210, 0x2e34, 0x3700, 0x1348, + 0x0824, 0x2310, 0x2f34, 0x3800, 0x1448, + 0x0924, 0x2314, 0x2f38, 0x3900, 0x1548, + 0x0a24, 0x2214, 0x2e38, 0x3a00, 0x1648, + 0x0b24, 0x2114, 0x2d38, 0x3b00, 0x1748, + 0x0b28, 0x2014, 0x2c38, 0x3b04, 0x174c, + 0x0a28, 0x1f14, 0x2b38, 0x3a04, 0x164c, + 0x0928, 0x1e14, 0x2a38, 0x3904, 0x154c, + 0x0828, 0x1e18, 0x2a3c, 0x3804, 0x144c, + 0x0728, 0x1f18, 0x2b3c, 0x3704, 0x134c, + 0x0628, 0x2018, 0x2c3c, 0x3604, 0x124c, + 0x062c, 0x2118, 0x2d3c, 0x3608, 0x1250, + 0x072c, 0x2218, 0x2e3c, 0x3708, 0x1350, + 0x082c, 0x2318, 0x2f3c, 0x3808, 0x1450, + 0x092c, 0x231c, 0x2f40, 0x3908, 0x1550, + 0x0a2c, 0x221c, 0x2e40, 0x3a08, 0x1650, + 0x0b2c, 0x211c, 0x2d40, 0x3b08, 0x1750, + 0x0b30, 0x201c, 0x2c40, 0x3b0c, 0x1754, + 0x0a30, 0x1f1c, 0x2b40, 0x3a0c, 0x1654, + 0x0930, 0x1e1c, 0x2a40, 0x390c, 0x1554, + 0x0830, 0x1e20, 0x2a44, 0x380c, 0x1454, + 0x0730, 0x1f20, 0x2b44, 0x370c, 0x1354, + 0x0630, 0x2020, 0x2c44, 0x360c, 0x1254, + 0x0634, 0x2120, 0x2d44, 0x3610, 0x1258, + 0x0734, 0x2220, 0x2e44, 0x3710, 0x1458, + 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658, +}; + +/* 4:2:2 macroblock placement tables created by dvtables.py */ + +/* 2 channels per frame, 10 DIF sequences per channel, + 27 video segments per DIF sequence, 5 macroblocks per video segment */ +static const uint16_t dv_place_422_525[2*10*27*5] = { + 0x0c48, 0x2424, 0x306c, 0x0000, 0x1890, + 0x0d48, 0x2524, 0x316c, 0x0100, 0x1990, + 0x0e48, 0x2624, 0x326c, 0x0200, 0x1a90, + 0x0e4c, 0x2628, 0x3270, 0x0204, 0x1a94, + 0x0d4c, 0x2528, 0x3170, 0x0104, 0x1994, + 0x0c4c, 0x2428, 0x3070, 0x0004, 0x1894, + 0x0c50, 0x242c, 0x3074, 0x0008, 0x1898, + 0x0d50, 0x252c, 0x3174, 0x0108, 0x1998, + 0x0e50, 0x262c, 0x3274, 0x0208, 0x1a98, + 0x0e54, 0x2630, 0x3278, 0x020c, 0x1a9c, + 0x0d54, 0x2530, 0x3178, 0x010c, 0x199c, + 0x0c54, 0x2430, 0x3078, 0x000c, 0x189c, + 0x0c58, 0x2434, 0x307c, 0x0010, 0x18a0, + 0x0d58, 0x2534, 0x317c, 0x0110, 0x19a0, + 0x0e58, 0x2634, 0x327c, 0x0210, 0x1aa0, + 0x0e5c, 0x2638, 0x3280, 0x0214, 0x1aa4, + 0x0d5c, 0x2538, 0x3180, 0x0114, 0x19a4, + 0x0c5c, 0x2438, 0x3080, 0x0014, 0x18a4, + 0x0c60, 0x243c, 0x3084, 0x0018, 0x18a8, + 0x0d60, 0x253c, 0x3184, 0x0118, 0x19a8, + 0x0e60, 0x263c, 0x3284, 0x0218, 0x1aa8, + 0x0e64, 0x2640, 0x3288, 0x021c, 0x1aac, + 0x0d64, 0x2540, 0x3188, 0x011c, 0x19ac, + 0x0c64, 0x2440, 0x3088, 0x001c, 0x18ac, + 0x0c68, 0x2444, 0x308c, 0x0020, 0x18b0, + 0x0d68, 0x2544, 0x318c, 0x0120, 0x19b0, + 0x0e68, 0x2644, 0x328c, 0x0220, 0x1ab0, + 0x1248, 0x2a24, 0x366c, 0x0600, 0x1e90, + 0x1348, 0x2b24, 0x376c, 0x0700, 0x1f90, + 0x1448, 0x2c24, 0x386c, 0x0800, 0x2090, + 0x144c, 0x2c28, 0x3870, 0x0804, 0x2094, + 0x134c, 0x2b28, 0x3770, 0x0704, 0x1f94, + 0x124c, 0x2a28, 0x3670, 0x0604, 0x1e94, + 0x1250, 0x2a2c, 0x3674, 0x0608, 0x1e98, + 0x1350, 0x2b2c, 0x3774, 0x0708, 0x1f98, + 0x1450, 0x2c2c, 0x3874, 0x0808, 0x2098, + 0x1454, 0x2c30, 0x3878, 0x080c, 0x209c, + 0x1354, 0x2b30, 0x3778, 0x070c, 0x1f9c, + 0x1254, 0x2a30, 0x3678, 0x060c, 0x1e9c, + 0x1258, 0x2a34, 0x367c, 0x0610, 0x1ea0, + 0x1358, 0x2b34, 0x377c, 0x0710, 0x1fa0, + 0x1458, 0x2c34, 0x387c, 0x0810, 0x20a0, + 0x145c, 0x2c38, 0x3880, 0x0814, 0x20a4, + 0x135c, 0x2b38, 0x3780, 0x0714, 0x1fa4, + 0x125c, 0x2a38, 0x3680, 0x0614, 0x1ea4, + 0x1260, 0x2a3c, 0x3684, 0x0618, 0x1ea8, + 0x1360, 0x2b3c, 0x3784, 0x0718, 0x1fa8, + 0x1460, 0x2c3c, 0x3884, 0x0818, 0x20a8, + 0x1464, 0x2c40, 0x3888, 0x081c, 0x20ac, + 0x1364, 0x2b40, 0x3788, 0x071c, 0x1fac, + 0x1264, 0x2a40, 0x3688, 0x061c, 0x1eac, + 0x1268, 0x2a44, 0x368c, 0x0620, 0x1eb0, + 0x1368, 0x2b44, 0x378c, 0x0720, 0x1fb0, + 0x1468, 0x2c44, 0x388c, 0x0820, 0x20b0, + 0x1848, 0x3024, 0x006c, 0x0c00, 0x2490, + 0x1948, 0x3124, 0x016c, 0x0d00, 0x2590, + 0x1a48, 0x3224, 0x026c, 0x0e00, 0x2690, + 0x1a4c, 0x3228, 0x0270, 0x0e04, 0x2694, + 0x194c, 0x3128, 0x0170, 0x0d04, 0x2594, + 0x184c, 0x3028, 0x0070, 0x0c04, 0x2494, + 0x1850, 0x302c, 0x0074, 0x0c08, 0x2498, + 0x1950, 0x312c, 0x0174, 0x0d08, 0x2598, + 0x1a50, 0x322c, 0x0274, 0x0e08, 0x2698, + 0x1a54, 0x3230, 0x0278, 0x0e0c, 0x269c, + 0x1954, 0x3130, 0x0178, 0x0d0c, 0x259c, + 0x1854, 0x3030, 0x0078, 0x0c0c, 0x249c, + 0x1858, 0x3034, 0x007c, 0x0c10, 0x24a0, + 0x1958, 0x3134, 0x017c, 0x0d10, 0x25a0, + 0x1a58, 0x3234, 0x027c, 0x0e10, 0x26a0, + 0x1a5c, 0x3238, 0x0280, 0x0e14, 0x26a4, + 0x195c, 0x3138, 0x0180, 0x0d14, 0x25a4, + 0x185c, 0x3038, 0x0080, 0x0c14, 0x24a4, + 0x1860, 0x303c, 0x0084, 0x0c18, 0x24a8, + 0x1960, 0x313c, 0x0184, 0x0d18, 0x25a8, + 0x1a60, 0x323c, 0x0284, 0x0e18, 0x26a8, + 0x1a64, 0x3240, 0x0288, 0x0e1c, 0x26ac, + 0x1964, 0x3140, 0x0188, 0x0d1c, 0x25ac, + 0x1864, 0x3040, 0x0088, 0x0c1c, 0x24ac, + 0x1868, 0x3044, 0x008c, 0x0c20, 0x24b0, + 0x1968, 0x3144, 0x018c, 0x0d20, 0x25b0, + 0x1a68, 0x3244, 0x028c, 0x0e20, 0x26b0, + 0x1e48, 0x3624, 0x066c, 0x1200, 0x2a90, + 0x1f48, 0x3724, 0x076c, 0x1300, 0x2b90, + 0x2048, 0x3824, 0x086c, 0x1400, 0x2c90, + 0x204c, 0x3828, 0x0870, 0x1404, 0x2c94, + 0x1f4c, 0x3728, 0x0770, 0x1304, 0x2b94, + 0x1e4c, 0x3628, 0x0670, 0x1204, 0x2a94, + 0x1e50, 0x362c, 0x0674, 0x1208, 0x2a98, + 0x1f50, 0x372c, 0x0774, 0x1308, 0x2b98, + 0x2050, 0x382c, 0x0874, 0x1408, 0x2c98, + 0x2054, 0x3830, 0x0878, 0x140c, 0x2c9c, + 0x1f54, 0x3730, 0x0778, 0x130c, 0x2b9c, + 0x1e54, 0x3630, 0x0678, 0x120c, 0x2a9c, + 0x1e58, 0x3634, 0x067c, 0x1210, 0x2aa0, + 0x1f58, 0x3734, 0x077c, 0x1310, 0x2ba0, + 0x2058, 0x3834, 0x087c, 0x1410, 0x2ca0, + 0x205c, 0x3838, 0x0880, 0x1414, 0x2ca4, + 0x1f5c, 0x3738, 0x0780, 0x1314, 0x2ba4, + 0x1e5c, 0x3638, 0x0680, 0x1214, 0x2aa4, + 0x1e60, 0x363c, 0x0684, 0x1218, 0x2aa8, + 0x1f60, 0x373c, 0x0784, 0x1318, 0x2ba8, + 0x2060, 0x383c, 0x0884, 0x1418, 0x2ca8, + 0x2064, 0x3840, 0x0888, 0x141c, 0x2cac, + 0x1f64, 0x3740, 0x0788, 0x131c, 0x2bac, + 0x1e64, 0x3640, 0x0688, 0x121c, 0x2aac, + 0x1e68, 0x3644, 0x068c, 0x1220, 0x2ab0, + 0x1f68, 0x3744, 0x078c, 0x1320, 0x2bb0, + 0x2068, 0x3844, 0x088c, 0x1420, 0x2cb0, + 0x2448, 0x0024, 0x0c6c, 0x1800, 0x3090, + 0x2548, 0x0124, 0x0d6c, 0x1900, 0x3190, + 0x2648, 0x0224, 0x0e6c, 0x1a00, 0x3290, + 0x264c, 0x0228, 0x0e70, 0x1a04, 0x3294, + 0x254c, 0x0128, 0x0d70, 0x1904, 0x3194, + 0x244c, 0x0028, 0x0c70, 0x1804, 0x3094, + 0x2450, 0x002c, 0x0c74, 0x1808, 0x3098, + 0x2550, 0x012c, 0x0d74, 0x1908, 0x3198, + 0x2650, 0x022c, 0x0e74, 0x1a08, 0x3298, + 0x2654, 0x0230, 0x0e78, 0x1a0c, 0x329c, + 0x2554, 0x0130, 0x0d78, 0x190c, 0x319c, + 0x2454, 0x0030, 0x0c78, 0x180c, 0x309c, + 0x2458, 0x0034, 0x0c7c, 0x1810, 0x30a0, + 0x2558, 0x0134, 0x0d7c, 0x1910, 0x31a0, + 0x2658, 0x0234, 0x0e7c, 0x1a10, 0x32a0, + 0x265c, 0x0238, 0x0e80, 0x1a14, 0x32a4, + 0x255c, 0x0138, 0x0d80, 0x1914, 0x31a4, + 0x245c, 0x0038, 0x0c80, 0x1814, 0x30a4, + 0x2460, 0x003c, 0x0c84, 0x1818, 0x30a8, + 0x2560, 0x013c, 0x0d84, 0x1918, 0x31a8, + 0x2660, 0x023c, 0x0e84, 0x1a18, 0x32a8, + 0x2664, 0x0240, 0x0e88, 0x1a1c, 0x32ac, + 0x2564, 0x0140, 0x0d88, 0x191c, 0x31ac, + 0x2464, 0x0040, 0x0c88, 0x181c, 0x30ac, + 0x2468, 0x0044, 0x0c8c, 0x1820, 0x30b0, + 0x2568, 0x0144, 0x0d8c, 0x1920, 0x31b0, + 0x2668, 0x0244, 0x0e8c, 0x1a20, 0x32b0, + 0x2a48, 0x0624, 0x126c, 0x1e00, 0x3690, + 0x2b48, 0x0724, 0x136c, 0x1f00, 0x3790, + 0x2c48, 0x0824, 0x146c, 0x2000, 0x3890, + 0x2c4c, 0x0828, 0x1470, 0x2004, 0x3894, + 0x2b4c, 0x0728, 0x1370, 0x1f04, 0x3794, + 0x2a4c, 0x0628, 0x1270, 0x1e04, 0x3694, + 0x2a50, 0x062c, 0x1274, 0x1e08, 0x3698, + 0x2b50, 0x072c, 0x1374, 0x1f08, 0x3798, + 0x2c50, 0x082c, 0x1474, 0x2008, 0x3898, + 0x2c54, 0x0830, 0x1478, 0x200c, 0x389c, + 0x2b54, 0x0730, 0x1378, 0x1f0c, 0x379c, + 0x2a54, 0x0630, 0x1278, 0x1e0c, 0x369c, + 0x2a58, 0x0634, 0x127c, 0x1e10, 0x36a0, + 0x2b58, 0x0734, 0x137c, 0x1f10, 0x37a0, + 0x2c58, 0x0834, 0x147c, 0x2010, 0x38a0, + 0x2c5c, 0x0838, 0x1480, 0x2014, 0x38a4, + 0x2b5c, 0x0738, 0x1380, 0x1f14, 0x37a4, + 0x2a5c, 0x0638, 0x1280, 0x1e14, 0x36a4, + 0x2a60, 0x063c, 0x1284, 0x1e18, 0x36a8, + 0x2b60, 0x073c, 0x1384, 0x1f18, 0x37a8, + 0x2c60, 0x083c, 0x1484, 0x2018, 0x38a8, + 0x2c64, 0x0840, 0x1488, 0x201c, 0x38ac, + 0x2b64, 0x0740, 0x1388, 0x1f1c, 0x37ac, + 0x2a64, 0x0640, 0x1288, 0x1e1c, 0x36ac, + 0x2a68, 0x0644, 0x128c, 0x1e20, 0x36b0, + 0x2b68, 0x0744, 0x138c, 0x1f20, 0x37b0, + 0x2c68, 0x0844, 0x148c, 0x2020, 0x38b0, + 0x3048, 0x0c24, 0x186c, 0x2400, 0x0090, + 0x3148, 0x0d24, 0x196c, 0x2500, 0x0190, + 0x3248, 0x0e24, 0x1a6c, 0x2600, 0x0290, + 0x324c, 0x0e28, 0x1a70, 0x2604, 0x0294, + 0x314c, 0x0d28, 0x1970, 0x2504, 0x0194, + 0x304c, 0x0c28, 0x1870, 0x2404, 0x0094, + 0x3050, 0x0c2c, 0x1874, 0x2408, 0x0098, + 0x3150, 0x0d2c, 0x1974, 0x2508, 0x0198, + 0x3250, 0x0e2c, 0x1a74, 0x2608, 0x0298, + 0x3254, 0x0e30, 0x1a78, 0x260c, 0x029c, + 0x3154, 0x0d30, 0x1978, 0x250c, 0x019c, + 0x3054, 0x0c30, 0x1878, 0x240c, 0x009c, + 0x3058, 0x0c34, 0x187c, 0x2410, 0x00a0, + 0x3158, 0x0d34, 0x197c, 0x2510, 0x01a0, + 0x3258, 0x0e34, 0x1a7c, 0x2610, 0x02a0, + 0x325c, 0x0e38, 0x1a80, 0x2614, 0x02a4, + 0x315c, 0x0d38, 0x1980, 0x2514, 0x01a4, + 0x305c, 0x0c38, 0x1880, 0x2414, 0x00a4, + 0x3060, 0x0c3c, 0x1884, 0x2418, 0x00a8, + 0x3160, 0x0d3c, 0x1984, 0x2518, 0x01a8, + 0x3260, 0x0e3c, 0x1a84, 0x2618, 0x02a8, + 0x3264, 0x0e40, 0x1a88, 0x261c, 0x02ac, + 0x3164, 0x0d40, 0x1988, 0x251c, 0x01ac, + 0x3064, 0x0c40, 0x1888, 0x241c, 0x00ac, + 0x3068, 0x0c44, 0x188c, 0x2420, 0x00b0, + 0x3168, 0x0d44, 0x198c, 0x2520, 0x01b0, + 0x3268, 0x0e44, 0x1a8c, 0x2620, 0x02b0, + 0x3648, 0x1224, 0x1e6c, 0x2a00, 0x0690, + 0x3748, 0x1324, 0x1f6c, 0x2b00, 0x0790, + 0x3848, 0x1424, 0x206c, 0x2c00, 0x0890, + 0x384c, 0x1428, 0x2070, 0x2c04, 0x0894, + 0x374c, 0x1328, 0x1f70, 0x2b04, 0x0794, + 0x364c, 0x1228, 0x1e70, 0x2a04, 0x0694, + 0x3650, 0x122c, 0x1e74, 0x2a08, 0x0698, + 0x3750, 0x132c, 0x1f74, 0x2b08, 0x0798, + 0x3850, 0x142c, 0x2074, 0x2c08, 0x0898, + 0x3854, 0x1430, 0x2078, 0x2c0c, 0x089c, + 0x3754, 0x1330, 0x1f78, 0x2b0c, 0x079c, + 0x3654, 0x1230, 0x1e78, 0x2a0c, 0x069c, + 0x3658, 0x1234, 0x1e7c, 0x2a10, 0x06a0, + 0x3758, 0x1334, 0x1f7c, 0x2b10, 0x07a0, + 0x3858, 0x1434, 0x207c, 0x2c10, 0x08a0, + 0x385c, 0x1438, 0x2080, 0x2c14, 0x08a4, + 0x375c, 0x1338, 0x1f80, 0x2b14, 0x07a4, + 0x365c, 0x1238, 0x1e80, 0x2a14, 0x06a4, + 0x3660, 0x123c, 0x1e84, 0x2a18, 0x06a8, + 0x3760, 0x133c, 0x1f84, 0x2b18, 0x07a8, + 0x3860, 0x143c, 0x2084, 0x2c18, 0x08a8, + 0x3864, 0x1440, 0x2088, 0x2c1c, 0x08ac, + 0x3764, 0x1340, 0x1f88, 0x2b1c, 0x07ac, + 0x3664, 0x1240, 0x1e88, 0x2a1c, 0x06ac, + 0x3668, 0x1244, 0x1e8c, 0x2a20, 0x06b0, + 0x3768, 0x1344, 0x1f8c, 0x2b20, 0x07b0, + 0x3868, 0x1444, 0x208c, 0x2c20, 0x08b0, + 0x0048, 0x1824, 0x246c, 0x3000, 0x0c90, + 0x0148, 0x1924, 0x256c, 0x3100, 0x0d90, + 0x0248, 0x1a24, 0x266c, 0x3200, 0x0e90, + 0x024c, 0x1a28, 0x2670, 0x3204, 0x0e94, + 0x014c, 0x1928, 0x2570, 0x3104, 0x0d94, + 0x004c, 0x1828, 0x2470, 0x3004, 0x0c94, + 0x0050, 0x182c, 0x2474, 0x3008, 0x0c98, + 0x0150, 0x192c, 0x2574, 0x3108, 0x0d98, + 0x0250, 0x1a2c, 0x2674, 0x3208, 0x0e98, + 0x0254, 0x1a30, 0x2678, 0x320c, 0x0e9c, + 0x0154, 0x1930, 0x2578, 0x310c, 0x0d9c, + 0x0054, 0x1830, 0x2478, 0x300c, 0x0c9c, + 0x0058, 0x1834, 0x247c, 0x3010, 0x0ca0, + 0x0158, 0x1934, 0x257c, 0x3110, 0x0da0, + 0x0258, 0x1a34, 0x267c, 0x3210, 0x0ea0, + 0x025c, 0x1a38, 0x2680, 0x3214, 0x0ea4, + 0x015c, 0x1938, 0x2580, 0x3114, 0x0da4, + 0x005c, 0x1838, 0x2480, 0x3014, 0x0ca4, + 0x0060, 0x183c, 0x2484, 0x3018, 0x0ca8, + 0x0160, 0x193c, 0x2584, 0x3118, 0x0da8, + 0x0260, 0x1a3c, 0x2684, 0x3218, 0x0ea8, + 0x0264, 0x1a40, 0x2688, 0x321c, 0x0eac, + 0x0164, 0x1940, 0x2588, 0x311c, 0x0dac, + 0x0064, 0x1840, 0x2488, 0x301c, 0x0cac, + 0x0068, 0x1844, 0x248c, 0x3020, 0x0cb0, + 0x0168, 0x1944, 0x258c, 0x3120, 0x0db0, + 0x0268, 0x1a44, 0x268c, 0x3220, 0x0eb0, + 0x0648, 0x1e24, 0x2a6c, 0x3600, 0x1290, + 0x0748, 0x1f24, 0x2b6c, 0x3700, 0x1390, + 0x0848, 0x2024, 0x2c6c, 0x3800, 0x1490, + 0x084c, 0x2028, 0x2c70, 0x3804, 0x1494, + 0x074c, 0x1f28, 0x2b70, 0x3704, 0x1394, + 0x064c, 0x1e28, 0x2a70, 0x3604, 0x1294, + 0x0650, 0x1e2c, 0x2a74, 0x3608, 0x1298, + 0x0750, 0x1f2c, 0x2b74, 0x3708, 0x1398, + 0x0850, 0x202c, 0x2c74, 0x3808, 0x1498, + 0x0854, 0x2030, 0x2c78, 0x380c, 0x149c, + 0x0754, 0x1f30, 0x2b78, 0x370c, 0x139c, + 0x0654, 0x1e30, 0x2a78, 0x360c, 0x129c, + 0x0658, 0x1e34, 0x2a7c, 0x3610, 0x12a0, + 0x0758, 0x1f34, 0x2b7c, 0x3710, 0x13a0, + 0x0858, 0x2034, 0x2c7c, 0x3810, 0x14a0, + 0x085c, 0x2038, 0x2c80, 0x3814, 0x14a4, + 0x075c, 0x1f38, 0x2b80, 0x3714, 0x13a4, + 0x065c, 0x1e38, 0x2a80, 0x3614, 0x12a4, + 0x0660, 0x1e3c, 0x2a84, 0x3618, 0x12a8, + 0x0760, 0x1f3c, 0x2b84, 0x3718, 0x13a8, + 0x0860, 0x203c, 0x2c84, 0x3818, 0x14a8, + 0x0864, 0x2040, 0x2c88, 0x381c, 0x14ac, + 0x0764, 0x1f40, 0x2b88, 0x371c, 0x13ac, + 0x0664, 0x1e40, 0x2a88, 0x361c, 0x12ac, + 0x0668, 0x1e44, 0x2a8c, 0x3620, 0x12b0, + 0x0768, 0x1f44, 0x2b8c, 0x3720, 0x13b0, + 0x0868, 0x2044, 0x2c8c, 0x3820, 0x14b0, + 0x0f48, 0x2724, 0x336c, 0x0300, 0x1b90, + 0x1048, 0x2824, 0x346c, 0x0400, 0x1c90, + 0x1148, 0x2924, 0x356c, 0x0500, 0x1d90, + 0x114c, 0x2928, 0x3570, 0x0504, 0x1d94, + 0x104c, 0x2828, 0x3470, 0x0404, 0x1c94, + 0x0f4c, 0x2728, 0x3370, 0x0304, 0x1b94, + 0x0f50, 0x272c, 0x3374, 0x0308, 0x1b98, + 0x1050, 0x282c, 0x3474, 0x0408, 0x1c98, + 0x1150, 0x292c, 0x3574, 0x0508, 0x1d98, + 0x1154, 0x2930, 0x3578, 0x050c, 0x1d9c, + 0x1054, 0x2830, 0x3478, 0x040c, 0x1c9c, + 0x0f54, 0x2730, 0x3378, 0x030c, 0x1b9c, + 0x0f58, 0x2734, 0x337c, 0x0310, 0x1ba0, + 0x1058, 0x2834, 0x347c, 0x0410, 0x1ca0, + 0x1158, 0x2934, 0x357c, 0x0510, 0x1da0, + 0x115c, 0x2938, 0x3580, 0x0514, 0x1da4, + 0x105c, 0x2838, 0x3480, 0x0414, 0x1ca4, + 0x0f5c, 0x2738, 0x3380, 0x0314, 0x1ba4, + 0x0f60, 0x273c, 0x3384, 0x0318, 0x1ba8, + 0x1060, 0x283c, 0x3484, 0x0418, 0x1ca8, + 0x1160, 0x293c, 0x3584, 0x0518, 0x1da8, + 0x1164, 0x2940, 0x3588, 0x051c, 0x1dac, + 0x1064, 0x2840, 0x3488, 0x041c, 0x1cac, + 0x0f64, 0x2740, 0x3388, 0x031c, 0x1bac, + 0x0f68, 0x2744, 0x338c, 0x0320, 0x1bb0, + 0x1068, 0x2844, 0x348c, 0x0420, 0x1cb0, + 0x1168, 0x2944, 0x358c, 0x0520, 0x1db0, + 0x1548, 0x2d24, 0x396c, 0x0900, 0x2190, + 0x1648, 0x2e24, 0x3a6c, 0x0a00, 0x2290, + 0x1748, 0x2f24, 0x3b6c, 0x0b00, 0x2390, + 0x174c, 0x2f28, 0x3b70, 0x0b04, 0x2394, + 0x164c, 0x2e28, 0x3a70, 0x0a04, 0x2294, + 0x154c, 0x2d28, 0x3970, 0x0904, 0x2194, + 0x1550, 0x2d2c, 0x3974, 0x0908, 0x2198, + 0x1650, 0x2e2c, 0x3a74, 0x0a08, 0x2298, + 0x1750, 0x2f2c, 0x3b74, 0x0b08, 0x2398, + 0x1754, 0x2f30, 0x3b78, 0x0b0c, 0x239c, + 0x1654, 0x2e30, 0x3a78, 0x0a0c, 0x229c, + 0x1554, 0x2d30, 0x3978, 0x090c, 0x219c, + 0x1558, 0x2d34, 0x397c, 0x0910, 0x21a0, + 0x1658, 0x2e34, 0x3a7c, 0x0a10, 0x22a0, + 0x1758, 0x2f34, 0x3b7c, 0x0b10, 0x23a0, + 0x175c, 0x2f38, 0x3b80, 0x0b14, 0x23a4, + 0x165c, 0x2e38, 0x3a80, 0x0a14, 0x22a4, + 0x155c, 0x2d38, 0x3980, 0x0914, 0x21a4, + 0x1560, 0x2d3c, 0x3984, 0x0918, 0x21a8, + 0x1660, 0x2e3c, 0x3a84, 0x0a18, 0x22a8, + 0x1760, 0x2f3c, 0x3b84, 0x0b18, 0x23a8, + 0x1764, 0x2f40, 0x3b88, 0x0b1c, 0x23ac, + 0x1664, 0x2e40, 0x3a88, 0x0a1c, 0x22ac, + 0x1564, 0x2d40, 0x3988, 0x091c, 0x21ac, + 0x1568, 0x2d44, 0x398c, 0x0920, 0x21b0, + 0x1668, 0x2e44, 0x3a8c, 0x0a20, 0x22b0, + 0x1768, 0x2f44, 0x3b8c, 0x0b20, 0x23b0, + 0x1b48, 0x3324, 0x036c, 0x0f00, 0x2790, + 0x1c48, 0x3424, 0x046c, 0x1000, 0x2890, + 0x1d48, 0x3524, 0x056c, 0x1100, 0x2990, + 0x1d4c, 0x3528, 0x0570, 0x1104, 0x2994, + 0x1c4c, 0x3428, 0x0470, 0x1004, 0x2894, + 0x1b4c, 0x3328, 0x0370, 0x0f04, 0x2794, + 0x1b50, 0x332c, 0x0374, 0x0f08, 0x2798, + 0x1c50, 0x342c, 0x0474, 0x1008, 0x2898, + 0x1d50, 0x352c, 0x0574, 0x1108, 0x2998, + 0x1d54, 0x3530, 0x0578, 0x110c, 0x299c, + 0x1c54, 0x3430, 0x0478, 0x100c, 0x289c, + 0x1b54, 0x3330, 0x0378, 0x0f0c, 0x279c, + 0x1b58, 0x3334, 0x037c, 0x0f10, 0x27a0, + 0x1c58, 0x3434, 0x047c, 0x1010, 0x28a0, + 0x1d58, 0x3534, 0x057c, 0x1110, 0x29a0, + 0x1d5c, 0x3538, 0x0580, 0x1114, 0x29a4, + 0x1c5c, 0x3438, 0x0480, 0x1014, 0x28a4, + 0x1b5c, 0x3338, 0x0380, 0x0f14, 0x27a4, + 0x1b60, 0x333c, 0x0384, 0x0f18, 0x27a8, + 0x1c60, 0x343c, 0x0484, 0x1018, 0x28a8, + 0x1d60, 0x353c, 0x0584, 0x1118, 0x29a8, + 0x1d64, 0x3540, 0x0588, 0x111c, 0x29ac, + 0x1c64, 0x3440, 0x0488, 0x101c, 0x28ac, + 0x1b64, 0x3340, 0x0388, 0x0f1c, 0x27ac, + 0x1b68, 0x3344, 0x038c, 0x0f20, 0x27b0, + 0x1c68, 0x3444, 0x048c, 0x1020, 0x28b0, + 0x1d68, 0x3544, 0x058c, 0x1120, 0x29b0, + 0x2148, 0x3924, 0x096c, 0x1500, 0x2d90, + 0x2248, 0x3a24, 0x0a6c, 0x1600, 0x2e90, + 0x2348, 0x3b24, 0x0b6c, 0x1700, 0x2f90, + 0x234c, 0x3b28, 0x0b70, 0x1704, 0x2f94, + 0x224c, 0x3a28, 0x0a70, 0x1604, 0x2e94, + 0x214c, 0x3928, 0x0970, 0x1504, 0x2d94, + 0x2150, 0x392c, 0x0974, 0x1508, 0x2d98, + 0x2250, 0x3a2c, 0x0a74, 0x1608, 0x2e98, + 0x2350, 0x3b2c, 0x0b74, 0x1708, 0x2f98, + 0x2354, 0x3b30, 0x0b78, 0x170c, 0x2f9c, + 0x2254, 0x3a30, 0x0a78, 0x160c, 0x2e9c, + 0x2154, 0x3930, 0x0978, 0x150c, 0x2d9c, + 0x2158, 0x3934, 0x097c, 0x1510, 0x2da0, + 0x2258, 0x3a34, 0x0a7c, 0x1610, 0x2ea0, + 0x2358, 0x3b34, 0x0b7c, 0x1710, 0x2fa0, + 0x235c, 0x3b38, 0x0b80, 0x1714, 0x2fa4, + 0x225c, 0x3a38, 0x0a80, 0x1614, 0x2ea4, + 0x215c, 0x3938, 0x0980, 0x1514, 0x2da4, + 0x2160, 0x393c, 0x0984, 0x1518, 0x2da8, + 0x2260, 0x3a3c, 0x0a84, 0x1618, 0x2ea8, + 0x2360, 0x3b3c, 0x0b84, 0x1718, 0x2fa8, + 0x2364, 0x3b40, 0x0b88, 0x171c, 0x2fac, + 0x2264, 0x3a40, 0x0a88, 0x161c, 0x2eac, + 0x2164, 0x3940, 0x0988, 0x151c, 0x2dac, + 0x2168, 0x3944, 0x098c, 0x1520, 0x2db0, + 0x2268, 0x3a44, 0x0a8c, 0x1620, 0x2eb0, + 0x2368, 0x3b44, 0x0b8c, 0x1720, 0x2fb0, + 0x2748, 0x0324, 0x0f6c, 0x1b00, 0x3390, + 0x2848, 0x0424, 0x106c, 0x1c00, 0x3490, + 0x2948, 0x0524, 0x116c, 0x1d00, 0x3590, + 0x294c, 0x0528, 0x1170, 0x1d04, 0x3594, + 0x284c, 0x0428, 0x1070, 0x1c04, 0x3494, + 0x274c, 0x0328, 0x0f70, 0x1b04, 0x3394, + 0x2750, 0x032c, 0x0f74, 0x1b08, 0x3398, + 0x2850, 0x042c, 0x1074, 0x1c08, 0x3498, + 0x2950, 0x052c, 0x1174, 0x1d08, 0x3598, + 0x2954, 0x0530, 0x1178, 0x1d0c, 0x359c, + 0x2854, 0x0430, 0x1078, 0x1c0c, 0x349c, + 0x2754, 0x0330, 0x0f78, 0x1b0c, 0x339c, + 0x2758, 0x0334, 0x0f7c, 0x1b10, 0x33a0, + 0x2858, 0x0434, 0x107c, 0x1c10, 0x34a0, + 0x2958, 0x0534, 0x117c, 0x1d10, 0x35a0, + 0x295c, 0x0538, 0x1180, 0x1d14, 0x35a4, + 0x285c, 0x0438, 0x1080, 0x1c14, 0x34a4, + 0x275c, 0x0338, 0x0f80, 0x1b14, 0x33a4, + 0x2760, 0x033c, 0x0f84, 0x1b18, 0x33a8, + 0x2860, 0x043c, 0x1084, 0x1c18, 0x34a8, + 0x2960, 0x053c, 0x1184, 0x1d18, 0x35a8, + 0x2964, 0x0540, 0x1188, 0x1d1c, 0x35ac, + 0x2864, 0x0440, 0x1088, 0x1c1c, 0x34ac, + 0x2764, 0x0340, 0x0f88, 0x1b1c, 0x33ac, + 0x2768, 0x0344, 0x0f8c, 0x1b20, 0x33b0, + 0x2868, 0x0444, 0x108c, 0x1c20, 0x34b0, + 0x2968, 0x0544, 0x118c, 0x1d20, 0x35b0, + 0x2d48, 0x0924, 0x156c, 0x2100, 0x3990, + 0x2e48, 0x0a24, 0x166c, 0x2200, 0x3a90, + 0x2f48, 0x0b24, 0x176c, 0x2300, 0x3b90, + 0x2f4c, 0x0b28, 0x1770, 0x2304, 0x3b94, + 0x2e4c, 0x0a28, 0x1670, 0x2204, 0x3a94, + 0x2d4c, 0x0928, 0x1570, 0x2104, 0x3994, + 0x2d50, 0x092c, 0x1574, 0x2108, 0x3998, + 0x2e50, 0x0a2c, 0x1674, 0x2208, 0x3a98, + 0x2f50, 0x0b2c, 0x1774, 0x2308, 0x3b98, + 0x2f54, 0x0b30, 0x1778, 0x230c, 0x3b9c, + 0x2e54, 0x0a30, 0x1678, 0x220c, 0x3a9c, + 0x2d54, 0x0930, 0x1578, 0x210c, 0x399c, + 0x2d58, 0x0934, 0x157c, 0x2110, 0x39a0, + 0x2e58, 0x0a34, 0x167c, 0x2210, 0x3aa0, + 0x2f58, 0x0b34, 0x177c, 0x2310, 0x3ba0, + 0x2f5c, 0x0b38, 0x1780, 0x2314, 0x3ba4, + 0x2e5c, 0x0a38, 0x1680, 0x2214, 0x3aa4, + 0x2d5c, 0x0938, 0x1580, 0x2114, 0x39a4, + 0x2d60, 0x093c, 0x1584, 0x2118, 0x39a8, + 0x2e60, 0x0a3c, 0x1684, 0x2218, 0x3aa8, + 0x2f60, 0x0b3c, 0x1784, 0x2318, 0x3ba8, + 0x2f64, 0x0b40, 0x1788, 0x231c, 0x3bac, + 0x2e64, 0x0a40, 0x1688, 0x221c, 0x3aac, + 0x2d64, 0x0940, 0x1588, 0x211c, 0x39ac, + 0x2d68, 0x0944, 0x158c, 0x2120, 0x39b0, + 0x2e68, 0x0a44, 0x168c, 0x2220, 0x3ab0, + 0x2f68, 0x0b44, 0x178c, 0x2320, 0x3bb0, + 0x3348, 0x0f24, 0x1b6c, 0x2700, 0x0390, + 0x3448, 0x1024, 0x1c6c, 0x2800, 0x0490, + 0x3548, 0x1124, 0x1d6c, 0x2900, 0x0590, + 0x354c, 0x1128, 0x1d70, 0x2904, 0x0594, + 0x344c, 0x1028, 0x1c70, 0x2804, 0x0494, + 0x334c, 0x0f28, 0x1b70, 0x2704, 0x0394, + 0x3350, 0x0f2c, 0x1b74, 0x2708, 0x0398, + 0x3450, 0x102c, 0x1c74, 0x2808, 0x0498, + 0x3550, 0x112c, 0x1d74, 0x2908, 0x0598, + 0x3554, 0x1130, 0x1d78, 0x290c, 0x059c, + 0x3454, 0x1030, 0x1c78, 0x280c, 0x049c, + 0x3354, 0x0f30, 0x1b78, 0x270c, 0x039c, + 0x3358, 0x0f34, 0x1b7c, 0x2710, 0x03a0, + 0x3458, 0x1034, 0x1c7c, 0x2810, 0x04a0, + 0x3558, 0x1134, 0x1d7c, 0x2910, 0x05a0, + 0x355c, 0x1138, 0x1d80, 0x2914, 0x05a4, + 0x345c, 0x1038, 0x1c80, 0x2814, 0x04a4, + 0x335c, 0x0f38, 0x1b80, 0x2714, 0x03a4, + 0x3360, 0x0f3c, 0x1b84, 0x2718, 0x03a8, + 0x3460, 0x103c, 0x1c84, 0x2818, 0x04a8, + 0x3560, 0x113c, 0x1d84, 0x2918, 0x05a8, + 0x3564, 0x1140, 0x1d88, 0x291c, 0x05ac, + 0x3464, 0x1040, 0x1c88, 0x281c, 0x04ac, + 0x3364, 0x0f40, 0x1b88, 0x271c, 0x03ac, + 0x3368, 0x0f44, 0x1b8c, 0x2720, 0x03b0, + 0x3468, 0x1044, 0x1c8c, 0x2820, 0x04b0, + 0x3568, 0x1144, 0x1d8c, 0x2920, 0x05b0, + 0x3948, 0x1524, 0x216c, 0x2d00, 0x0990, + 0x3a48, 0x1624, 0x226c, 0x2e00, 0x0a90, + 0x3b48, 0x1724, 0x236c, 0x2f00, 0x0b90, + 0x3b4c, 0x1728, 0x2370, 0x2f04, 0x0b94, + 0x3a4c, 0x1628, 0x2270, 0x2e04, 0x0a94, + 0x394c, 0x1528, 0x2170, 0x2d04, 0x0994, + 0x3950, 0x152c, 0x2174, 0x2d08, 0x0998, + 0x3a50, 0x162c, 0x2274, 0x2e08, 0x0a98, + 0x3b50, 0x172c, 0x2374, 0x2f08, 0x0b98, + 0x3b54, 0x1730, 0x2378, 0x2f0c, 0x0b9c, + 0x3a54, 0x1630, 0x2278, 0x2e0c, 0x0a9c, + 0x3954, 0x1530, 0x2178, 0x2d0c, 0x099c, + 0x3958, 0x1534, 0x217c, 0x2d10, 0x09a0, + 0x3a58, 0x1634, 0x227c, 0x2e10, 0x0aa0, + 0x3b58, 0x1734, 0x237c, 0x2f10, 0x0ba0, + 0x3b5c, 0x1738, 0x2380, 0x2f14, 0x0ba4, + 0x3a5c, 0x1638, 0x2280, 0x2e14, 0x0aa4, + 0x395c, 0x1538, 0x2180, 0x2d14, 0x09a4, + 0x3960, 0x153c, 0x2184, 0x2d18, 0x09a8, + 0x3a60, 0x163c, 0x2284, 0x2e18, 0x0aa8, + 0x3b60, 0x173c, 0x2384, 0x2f18, 0x0ba8, + 0x3b64, 0x1740, 0x2388, 0x2f1c, 0x0bac, + 0x3a64, 0x1640, 0x2288, 0x2e1c, 0x0aac, + 0x3964, 0x1540, 0x2188, 0x2d1c, 0x09ac, + 0x3968, 0x1544, 0x218c, 0x2d20, 0x09b0, + 0x3a68, 0x1644, 0x228c, 0x2e20, 0x0ab0, + 0x3b68, 0x1744, 0x238c, 0x2f20, 0x0bb0, + 0x0348, 0x1b24, 0x276c, 0x3300, 0x0f90, + 0x0448, 0x1c24, 0x286c, 0x3400, 0x1090, + 0x0548, 0x1d24, 0x296c, 0x3500, 0x1190, + 0x054c, 0x1d28, 0x2970, 0x3504, 0x1194, + 0x044c, 0x1c28, 0x2870, 0x3404, 0x1094, + 0x034c, 0x1b28, 0x2770, 0x3304, 0x0f94, + 0x0350, 0x1b2c, 0x2774, 0x3308, 0x0f98, + 0x0450, 0x1c2c, 0x2874, 0x3408, 0x1098, + 0x0550, 0x1d2c, 0x2974, 0x3508, 0x1198, + 0x0554, 0x1d30, 0x2978, 0x350c, 0x119c, + 0x0454, 0x1c30, 0x2878, 0x340c, 0x109c, + 0x0354, 0x1b30, 0x2778, 0x330c, 0x0f9c, + 0x0358, 0x1b34, 0x277c, 0x3310, 0x0fa0, + 0x0458, 0x1c34, 0x287c, 0x3410, 0x10a0, + 0x0558, 0x1d34, 0x297c, 0x3510, 0x11a0, + 0x055c, 0x1d38, 0x2980, 0x3514, 0x11a4, + 0x045c, 0x1c38, 0x2880, 0x3414, 0x10a4, + 0x035c, 0x1b38, 0x2780, 0x3314, 0x0fa4, + 0x0360, 0x1b3c, 0x2784, 0x3318, 0x0fa8, + 0x0460, 0x1c3c, 0x2884, 0x3418, 0x10a8, + 0x0560, 0x1d3c, 0x2984, 0x3518, 0x11a8, + 0x0564, 0x1d40, 0x2988, 0x351c, 0x11ac, + 0x0464, 0x1c40, 0x2888, 0x341c, 0x10ac, + 0x0364, 0x1b40, 0x2788, 0x331c, 0x0fac, + 0x0368, 0x1b44, 0x278c, 0x3320, 0x0fb0, + 0x0468, 0x1c44, 0x288c, 0x3420, 0x10b0, + 0x0568, 0x1d44, 0x298c, 0x3520, 0x11b0, + 0x0948, 0x2124, 0x2d6c, 0x3900, 0x1590, + 0x0a48, 0x2224, 0x2e6c, 0x3a00, 0x1690, + 0x0b48, 0x2324, 0x2f6c, 0x3b00, 0x1790, + 0x0b4c, 0x2328, 0x2f70, 0x3b04, 0x1794, + 0x0a4c, 0x2228, 0x2e70, 0x3a04, 0x1694, + 0x094c, 0x2128, 0x2d70, 0x3904, 0x1594, + 0x0950, 0x212c, 0x2d74, 0x3908, 0x1598, + 0x0a50, 0x222c, 0x2e74, 0x3a08, 0x1698, + 0x0b50, 0x232c, 0x2f74, 0x3b08, 0x1798, + 0x0b54, 0x2330, 0x2f78, 0x3b0c, 0x179c, + 0x0a54, 0x2230, 0x2e78, 0x3a0c, 0x169c, + 0x0954, 0x2130, 0x2d78, 0x390c, 0x159c, + 0x0958, 0x2134, 0x2d7c, 0x3910, 0x15a0, + 0x0a58, 0x2234, 0x2e7c, 0x3a10, 0x16a0, + 0x0b58, 0x2334, 0x2f7c, 0x3b10, 0x17a0, + 0x0b5c, 0x2338, 0x2f80, 0x3b14, 0x17a4, + 0x0a5c, 0x2238, 0x2e80, 0x3a14, 0x16a4, + 0x095c, 0x2138, 0x2d80, 0x3914, 0x15a4, + 0x0960, 0x213c, 0x2d84, 0x3918, 0x15a8, + 0x0a60, 0x223c, 0x2e84, 0x3a18, 0x16a8, + 0x0b60, 0x233c, 0x2f84, 0x3b18, 0x17a8, + 0x0b64, 0x2340, 0x2f88, 0x3b1c, 0x17ac, + 0x0a64, 0x2240, 0x2e88, 0x3a1c, 0x16ac, + 0x0964, 0x2140, 0x2d88, 0x391c, 0x15ac, + 0x0968, 0x2144, 0x2d8c, 0x3920, 0x15b0, + 0x0a68, 0x2244, 0x2e8c, 0x3a20, 0x16b0, + 0x0b68, 0x2344, 0x2f8c, 0x3b20, 0x17b0, +}; + +/* 2 channels per frame, 12 DIF sequences per channel, + 27 video segments per DIF sequence, 5 macroblocks per video segment */ +static const uint16_t dv_place_422_625[2*12*27*5] = { + 0x0c48, 0x2424, 0x306c, 0x0000, 0x1890, + 0x0d48, 0x2524, 0x316c, 0x0100, 0x1990, + 0x0e48, 0x2624, 0x326c, 0x0200, 0x1a90, + 0x0e4c, 0x2628, 0x3270, 0x0204, 0x1a94, + 0x0d4c, 0x2528, 0x3170, 0x0104, 0x1994, + 0x0c4c, 0x2428, 0x3070, 0x0004, 0x1894, + 0x0c50, 0x242c, 0x3074, 0x0008, 0x1898, + 0x0d50, 0x252c, 0x3174, 0x0108, 0x1998, + 0x0e50, 0x262c, 0x3274, 0x0208, 0x1a98, + 0x0e54, 0x2630, 0x3278, 0x020c, 0x1a9c, + 0x0d54, 0x2530, 0x3178, 0x010c, 0x199c, + 0x0c54, 0x2430, 0x3078, 0x000c, 0x189c, + 0x0c58, 0x2434, 0x307c, 0x0010, 0x18a0, + 0x0d58, 0x2534, 0x317c, 0x0110, 0x19a0, + 0x0e58, 0x2634, 0x327c, 0x0210, 0x1aa0, + 0x0e5c, 0x2638, 0x3280, 0x0214, 0x1aa4, + 0x0d5c, 0x2538, 0x3180, 0x0114, 0x19a4, + 0x0c5c, 0x2438, 0x3080, 0x0014, 0x18a4, + 0x0c60, 0x243c, 0x3084, 0x0018, 0x18a8, + 0x0d60, 0x253c, 0x3184, 0x0118, 0x19a8, + 0x0e60, 0x263c, 0x3284, 0x0218, 0x1aa8, + 0x0e64, 0x2640, 0x3288, 0x021c, 0x1aac, + 0x0d64, 0x2540, 0x3188, 0x011c, 0x19ac, + 0x0c64, 0x2440, 0x3088, 0x001c, 0x18ac, + 0x0c68, 0x2444, 0x308c, 0x0020, 0x18b0, + 0x0d68, 0x2544, 0x318c, 0x0120, 0x19b0, + 0x0e68, 0x2644, 0x328c, 0x0220, 0x1ab0, + 0x1248, 0x2a24, 0x366c, 0x0600, 0x1e90, + 0x1348, 0x2b24, 0x376c, 0x0700, 0x1f90, + 0x1448, 0x2c24, 0x386c, 0x0800, 0x2090, + 0x144c, 0x2c28, 0x3870, 0x0804, 0x2094, + 0x134c, 0x2b28, 0x3770, 0x0704, 0x1f94, + 0x124c, 0x2a28, 0x3670, 0x0604, 0x1e94, + 0x1250, 0x2a2c, 0x3674, 0x0608, 0x1e98, + 0x1350, 0x2b2c, 0x3774, 0x0708, 0x1f98, + 0x1450, 0x2c2c, 0x3874, 0x0808, 0x2098, + 0x1454, 0x2c30, 0x3878, 0x080c, 0x209c, + 0x1354, 0x2b30, 0x3778, 0x070c, 0x1f9c, + 0x1254, 0x2a30, 0x3678, 0x060c, 0x1e9c, + 0x1258, 0x2a34, 0x367c, 0x0610, 0x1ea0, + 0x1358, 0x2b34, 0x377c, 0x0710, 0x1fa0, + 0x1458, 0x2c34, 0x387c, 0x0810, 0x20a0, + 0x145c, 0x2c38, 0x3880, 0x0814, 0x20a4, + 0x135c, 0x2b38, 0x3780, 0x0714, 0x1fa4, + 0x125c, 0x2a38, 0x3680, 0x0614, 0x1ea4, + 0x1260, 0x2a3c, 0x3684, 0x0618, 0x1ea8, + 0x1360, 0x2b3c, 0x3784, 0x0718, 0x1fa8, + 0x1460, 0x2c3c, 0x3884, 0x0818, 0x20a8, + 0x1464, 0x2c40, 0x3888, 0x081c, 0x20ac, + 0x1364, 0x2b40, 0x3788, 0x071c, 0x1fac, + 0x1264, 0x2a40, 0x3688, 0x061c, 0x1eac, + 0x1268, 0x2a44, 0x368c, 0x0620, 0x1eb0, + 0x1368, 0x2b44, 0x378c, 0x0720, 0x1fb0, + 0x1468, 0x2c44, 0x388c, 0x0820, 0x20b0, + 0x1848, 0x3024, 0x3c6c, 0x0c00, 0x2490, + 0x1948, 0x3124, 0x3d6c, 0x0d00, 0x2590, + 0x1a48, 0x3224, 0x3e6c, 0x0e00, 0x2690, + 0x1a4c, 0x3228, 0x3e70, 0x0e04, 0x2694, + 0x194c, 0x3128, 0x3d70, 0x0d04, 0x2594, + 0x184c, 0x3028, 0x3c70, 0x0c04, 0x2494, + 0x1850, 0x302c, 0x3c74, 0x0c08, 0x2498, + 0x1950, 0x312c, 0x3d74, 0x0d08, 0x2598, + 0x1a50, 0x322c, 0x3e74, 0x0e08, 0x2698, + 0x1a54, 0x3230, 0x3e78, 0x0e0c, 0x269c, + 0x1954, 0x3130, 0x3d78, 0x0d0c, 0x259c, + 0x1854, 0x3030, 0x3c78, 0x0c0c, 0x249c, + 0x1858, 0x3034, 0x3c7c, 0x0c10, 0x24a0, + 0x1958, 0x3134, 0x3d7c, 0x0d10, 0x25a0, + 0x1a58, 0x3234, 0x3e7c, 0x0e10, 0x26a0, + 0x1a5c, 0x3238, 0x3e80, 0x0e14, 0x26a4, + 0x195c, 0x3138, 0x3d80, 0x0d14, 0x25a4, + 0x185c, 0x3038, 0x3c80, 0x0c14, 0x24a4, + 0x1860, 0x303c, 0x3c84, 0x0c18, 0x24a8, + 0x1960, 0x313c, 0x3d84, 0x0d18, 0x25a8, + 0x1a60, 0x323c, 0x3e84, 0x0e18, 0x26a8, + 0x1a64, 0x3240, 0x3e88, 0x0e1c, 0x26ac, + 0x1964, 0x3140, 0x3d88, 0x0d1c, 0x25ac, + 0x1864, 0x3040, 0x3c88, 0x0c1c, 0x24ac, + 0x1868, 0x3044, 0x3c8c, 0x0c20, 0x24b0, + 0x1968, 0x3144, 0x3d8c, 0x0d20, 0x25b0, + 0x1a68, 0x3244, 0x3e8c, 0x0e20, 0x26b0, + 0x1e48, 0x3624, 0x426c, 0x1200, 0x2a90, + 0x1f48, 0x3724, 0x436c, 0x1300, 0x2b90, + 0x2048, 0x3824, 0x446c, 0x1400, 0x2c90, + 0x204c, 0x3828, 0x4470, 0x1404, 0x2c94, + 0x1f4c, 0x3728, 0x4370, 0x1304, 0x2b94, + 0x1e4c, 0x3628, 0x4270, 0x1204, 0x2a94, + 0x1e50, 0x362c, 0x4274, 0x1208, 0x2a98, + 0x1f50, 0x372c, 0x4374, 0x1308, 0x2b98, + 0x2050, 0x382c, 0x4474, 0x1408, 0x2c98, + 0x2054, 0x3830, 0x4478, 0x140c, 0x2c9c, + 0x1f54, 0x3730, 0x4378, 0x130c, 0x2b9c, + 0x1e54, 0x3630, 0x4278, 0x120c, 0x2a9c, + 0x1e58, 0x3634, 0x427c, 0x1210, 0x2aa0, + 0x1f58, 0x3734, 0x437c, 0x1310, 0x2ba0, + 0x2058, 0x3834, 0x447c, 0x1410, 0x2ca0, + 0x205c, 0x3838, 0x4480, 0x1414, 0x2ca4, + 0x1f5c, 0x3738, 0x4380, 0x1314, 0x2ba4, + 0x1e5c, 0x3638, 0x4280, 0x1214, 0x2aa4, + 0x1e60, 0x363c, 0x4284, 0x1218, 0x2aa8, + 0x1f60, 0x373c, 0x4384, 0x1318, 0x2ba8, + 0x2060, 0x383c, 0x4484, 0x1418, 0x2ca8, + 0x2064, 0x3840, 0x4488, 0x141c, 0x2cac, + 0x1f64, 0x3740, 0x4388, 0x131c, 0x2bac, + 0x1e64, 0x3640, 0x4288, 0x121c, 0x2aac, + 0x1e68, 0x3644, 0x428c, 0x1220, 0x2ab0, + 0x1f68, 0x3744, 0x438c, 0x1320, 0x2bb0, + 0x2068, 0x3844, 0x448c, 0x1420, 0x2cb0, + 0x2448, 0x3c24, 0x006c, 0x1800, 0x3090, + 0x2548, 0x3d24, 0x016c, 0x1900, 0x3190, + 0x2648, 0x3e24, 0x026c, 0x1a00, 0x3290, + 0x264c, 0x3e28, 0x0270, 0x1a04, 0x3294, + 0x254c, 0x3d28, 0x0170, 0x1904, 0x3194, + 0x244c, 0x3c28, 0x0070, 0x1804, 0x3094, + 0x2450, 0x3c2c, 0x0074, 0x1808, 0x3098, + 0x2550, 0x3d2c, 0x0174, 0x1908, 0x3198, + 0x2650, 0x3e2c, 0x0274, 0x1a08, 0x3298, + 0x2654, 0x3e30, 0x0278, 0x1a0c, 0x329c, + 0x2554, 0x3d30, 0x0178, 0x190c, 0x319c, + 0x2454, 0x3c30, 0x0078, 0x180c, 0x309c, + 0x2458, 0x3c34, 0x007c, 0x1810, 0x30a0, + 0x2558, 0x3d34, 0x017c, 0x1910, 0x31a0, + 0x2658, 0x3e34, 0x027c, 0x1a10, 0x32a0, + 0x265c, 0x3e38, 0x0280, 0x1a14, 0x32a4, + 0x255c, 0x3d38, 0x0180, 0x1914, 0x31a4, + 0x245c, 0x3c38, 0x0080, 0x1814, 0x30a4, + 0x2460, 0x3c3c, 0x0084, 0x1818, 0x30a8, + 0x2560, 0x3d3c, 0x0184, 0x1918, 0x31a8, + 0x2660, 0x3e3c, 0x0284, 0x1a18, 0x32a8, + 0x2664, 0x3e40, 0x0288, 0x1a1c, 0x32ac, + 0x2564, 0x3d40, 0x0188, 0x191c, 0x31ac, + 0x2464, 0x3c40, 0x0088, 0x181c, 0x30ac, + 0x2468, 0x3c44, 0x008c, 0x1820, 0x30b0, + 0x2568, 0x3d44, 0x018c, 0x1920, 0x31b0, + 0x2668, 0x3e44, 0x028c, 0x1a20, 0x32b0, + 0x2a48, 0x4224, 0x066c, 0x1e00, 0x3690, + 0x2b48, 0x4324, 0x076c, 0x1f00, 0x3790, + 0x2c48, 0x4424, 0x086c, 0x2000, 0x3890, + 0x2c4c, 0x4428, 0x0870, 0x2004, 0x3894, + 0x2b4c, 0x4328, 0x0770, 0x1f04, 0x3794, + 0x2a4c, 0x4228, 0x0670, 0x1e04, 0x3694, + 0x2a50, 0x422c, 0x0674, 0x1e08, 0x3698, + 0x2b50, 0x432c, 0x0774, 0x1f08, 0x3798, + 0x2c50, 0x442c, 0x0874, 0x2008, 0x3898, + 0x2c54, 0x4430, 0x0878, 0x200c, 0x389c, + 0x2b54, 0x4330, 0x0778, 0x1f0c, 0x379c, + 0x2a54, 0x4230, 0x0678, 0x1e0c, 0x369c, + 0x2a58, 0x4234, 0x067c, 0x1e10, 0x36a0, + 0x2b58, 0x4334, 0x077c, 0x1f10, 0x37a0, + 0x2c58, 0x4434, 0x087c, 0x2010, 0x38a0, + 0x2c5c, 0x4438, 0x0880, 0x2014, 0x38a4, + 0x2b5c, 0x4338, 0x0780, 0x1f14, 0x37a4, + 0x2a5c, 0x4238, 0x0680, 0x1e14, 0x36a4, + 0x2a60, 0x423c, 0x0684, 0x1e18, 0x36a8, + 0x2b60, 0x433c, 0x0784, 0x1f18, 0x37a8, + 0x2c60, 0x443c, 0x0884, 0x2018, 0x38a8, + 0x2c64, 0x4440, 0x0888, 0x201c, 0x38ac, + 0x2b64, 0x4340, 0x0788, 0x1f1c, 0x37ac, + 0x2a64, 0x4240, 0x0688, 0x1e1c, 0x36ac, + 0x2a68, 0x4244, 0x068c, 0x1e20, 0x36b0, + 0x2b68, 0x4344, 0x078c, 0x1f20, 0x37b0, + 0x2c68, 0x4444, 0x088c, 0x2020, 0x38b0, + 0x3048, 0x0024, 0x0c6c, 0x2400, 0x3c90, + 0x3148, 0x0124, 0x0d6c, 0x2500, 0x3d90, + 0x3248, 0x0224, 0x0e6c, 0x2600, 0x3e90, + 0x324c, 0x0228, 0x0e70, 0x2604, 0x3e94, + 0x314c, 0x0128, 0x0d70, 0x2504, 0x3d94, + 0x304c, 0x0028, 0x0c70, 0x2404, 0x3c94, + 0x3050, 0x002c, 0x0c74, 0x2408, 0x3c98, + 0x3150, 0x012c, 0x0d74, 0x2508, 0x3d98, + 0x3250, 0x022c, 0x0e74, 0x2608, 0x3e98, + 0x3254, 0x0230, 0x0e78, 0x260c, 0x3e9c, + 0x3154, 0x0130, 0x0d78, 0x250c, 0x3d9c, + 0x3054, 0x0030, 0x0c78, 0x240c, 0x3c9c, + 0x3058, 0x0034, 0x0c7c, 0x2410, 0x3ca0, + 0x3158, 0x0134, 0x0d7c, 0x2510, 0x3da0, + 0x3258, 0x0234, 0x0e7c, 0x2610, 0x3ea0, + 0x325c, 0x0238, 0x0e80, 0x2614, 0x3ea4, + 0x315c, 0x0138, 0x0d80, 0x2514, 0x3da4, + 0x305c, 0x0038, 0x0c80, 0x2414, 0x3ca4, + 0x3060, 0x003c, 0x0c84, 0x2418, 0x3ca8, + 0x3160, 0x013c, 0x0d84, 0x2518, 0x3da8, + 0x3260, 0x023c, 0x0e84, 0x2618, 0x3ea8, + 0x3264, 0x0240, 0x0e88, 0x261c, 0x3eac, + 0x3164, 0x0140, 0x0d88, 0x251c, 0x3dac, + 0x3064, 0x0040, 0x0c88, 0x241c, 0x3cac, + 0x3068, 0x0044, 0x0c8c, 0x2420, 0x3cb0, + 0x3168, 0x0144, 0x0d8c, 0x2520, 0x3db0, + 0x3268, 0x0244, 0x0e8c, 0x2620, 0x3eb0, + 0x3648, 0x0624, 0x126c, 0x2a00, 0x4290, + 0x3748, 0x0724, 0x136c, 0x2b00, 0x4390, + 0x3848, 0x0824, 0x146c, 0x2c00, 0x4490, + 0x384c, 0x0828, 0x1470, 0x2c04, 0x4494, + 0x374c, 0x0728, 0x1370, 0x2b04, 0x4394, + 0x364c, 0x0628, 0x1270, 0x2a04, 0x4294, + 0x3650, 0x062c, 0x1274, 0x2a08, 0x4298, + 0x3750, 0x072c, 0x1374, 0x2b08, 0x4398, + 0x3850, 0x082c, 0x1474, 0x2c08, 0x4498, + 0x3854, 0x0830, 0x1478, 0x2c0c, 0x449c, + 0x3754, 0x0730, 0x1378, 0x2b0c, 0x439c, + 0x3654, 0x0630, 0x1278, 0x2a0c, 0x429c, + 0x3658, 0x0634, 0x127c, 0x2a10, 0x42a0, + 0x3758, 0x0734, 0x137c, 0x2b10, 0x43a0, + 0x3858, 0x0834, 0x147c, 0x2c10, 0x44a0, + 0x385c, 0x0838, 0x1480, 0x2c14, 0x44a4, + 0x375c, 0x0738, 0x1380, 0x2b14, 0x43a4, + 0x365c, 0x0638, 0x1280, 0x2a14, 0x42a4, + 0x3660, 0x063c, 0x1284, 0x2a18, 0x42a8, + 0x3760, 0x073c, 0x1384, 0x2b18, 0x43a8, + 0x3860, 0x083c, 0x1484, 0x2c18, 0x44a8, + 0x3864, 0x0840, 0x1488, 0x2c1c, 0x44ac, + 0x3764, 0x0740, 0x1388, 0x2b1c, 0x43ac, + 0x3664, 0x0640, 0x1288, 0x2a1c, 0x42ac, + 0x3668, 0x0644, 0x128c, 0x2a20, 0x42b0, + 0x3768, 0x0744, 0x138c, 0x2b20, 0x43b0, + 0x3868, 0x0844, 0x148c, 0x2c20, 0x44b0, + 0x3c48, 0x0c24, 0x186c, 0x3000, 0x0090, + 0x3d48, 0x0d24, 0x196c, 0x3100, 0x0190, + 0x3e48, 0x0e24, 0x1a6c, 0x3200, 0x0290, + 0x3e4c, 0x0e28, 0x1a70, 0x3204, 0x0294, + 0x3d4c, 0x0d28, 0x1970, 0x3104, 0x0194, + 0x3c4c, 0x0c28, 0x1870, 0x3004, 0x0094, + 0x3c50, 0x0c2c, 0x1874, 0x3008, 0x0098, + 0x3d50, 0x0d2c, 0x1974, 0x3108, 0x0198, + 0x3e50, 0x0e2c, 0x1a74, 0x3208, 0x0298, + 0x3e54, 0x0e30, 0x1a78, 0x320c, 0x029c, + 0x3d54, 0x0d30, 0x1978, 0x310c, 0x019c, + 0x3c54, 0x0c30, 0x1878, 0x300c, 0x009c, + 0x3c58, 0x0c34, 0x187c, 0x3010, 0x00a0, + 0x3d58, 0x0d34, 0x197c, 0x3110, 0x01a0, + 0x3e58, 0x0e34, 0x1a7c, 0x3210, 0x02a0, + 0x3e5c, 0x0e38, 0x1a80, 0x3214, 0x02a4, + 0x3d5c, 0x0d38, 0x1980, 0x3114, 0x01a4, + 0x3c5c, 0x0c38, 0x1880, 0x3014, 0x00a4, + 0x3c60, 0x0c3c, 0x1884, 0x3018, 0x00a8, + 0x3d60, 0x0d3c, 0x1984, 0x3118, 0x01a8, + 0x3e60, 0x0e3c, 0x1a84, 0x3218, 0x02a8, + 0x3e64, 0x0e40, 0x1a88, 0x321c, 0x02ac, + 0x3d64, 0x0d40, 0x1988, 0x311c, 0x01ac, + 0x3c64, 0x0c40, 0x1888, 0x301c, 0x00ac, + 0x3c68, 0x0c44, 0x188c, 0x3020, 0x00b0, + 0x3d68, 0x0d44, 0x198c, 0x3120, 0x01b0, + 0x3e68, 0x0e44, 0x1a8c, 0x3220, 0x02b0, + 0x4248, 0x1224, 0x1e6c, 0x3600, 0x0690, + 0x4348, 0x1324, 0x1f6c, 0x3700, 0x0790, + 0x4448, 0x1424, 0x206c, 0x3800, 0x0890, + 0x444c, 0x1428, 0x2070, 0x3804, 0x0894, + 0x434c, 0x1328, 0x1f70, 0x3704, 0x0794, + 0x424c, 0x1228, 0x1e70, 0x3604, 0x0694, + 0x4250, 0x122c, 0x1e74, 0x3608, 0x0698, + 0x4350, 0x132c, 0x1f74, 0x3708, 0x0798, + 0x4450, 0x142c, 0x2074, 0x3808, 0x0898, + 0x4454, 0x1430, 0x2078, 0x380c, 0x089c, + 0x4354, 0x1330, 0x1f78, 0x370c, 0x079c, + 0x4254, 0x1230, 0x1e78, 0x360c, 0x069c, + 0x4258, 0x1234, 0x1e7c, 0x3610, 0x06a0, + 0x4358, 0x1334, 0x1f7c, 0x3710, 0x07a0, + 0x4458, 0x1434, 0x207c, 0x3810, 0x08a0, + 0x445c, 0x1438, 0x2080, 0x3814, 0x08a4, + 0x435c, 0x1338, 0x1f80, 0x3714, 0x07a4, + 0x425c, 0x1238, 0x1e80, 0x3614, 0x06a4, + 0x4260, 0x123c, 0x1e84, 0x3618, 0x06a8, + 0x4360, 0x133c, 0x1f84, 0x3718, 0x07a8, + 0x4460, 0x143c, 0x2084, 0x3818, 0x08a8, + 0x4464, 0x1440, 0x2088, 0x381c, 0x08ac, + 0x4364, 0x1340, 0x1f88, 0x371c, 0x07ac, + 0x4264, 0x1240, 0x1e88, 0x361c, 0x06ac, + 0x4268, 0x1244, 0x1e8c, 0x3620, 0x06b0, + 0x4368, 0x1344, 0x1f8c, 0x3720, 0x07b0, + 0x4468, 0x1444, 0x208c, 0x3820, 0x08b0, + 0x0048, 0x1824, 0x246c, 0x3c00, 0x0c90, + 0x0148, 0x1924, 0x256c, 0x3d00, 0x0d90, + 0x0248, 0x1a24, 0x266c, 0x3e00, 0x0e90, + 0x024c, 0x1a28, 0x2670, 0x3e04, 0x0e94, + 0x014c, 0x1928, 0x2570, 0x3d04, 0x0d94, + 0x004c, 0x1828, 0x2470, 0x3c04, 0x0c94, + 0x0050, 0x182c, 0x2474, 0x3c08, 0x0c98, + 0x0150, 0x192c, 0x2574, 0x3d08, 0x0d98, + 0x0250, 0x1a2c, 0x2674, 0x3e08, 0x0e98, + 0x0254, 0x1a30, 0x2678, 0x3e0c, 0x0e9c, + 0x0154, 0x1930, 0x2578, 0x3d0c, 0x0d9c, + 0x0054, 0x1830, 0x2478, 0x3c0c, 0x0c9c, + 0x0058, 0x1834, 0x247c, 0x3c10, 0x0ca0, + 0x0158, 0x1934, 0x257c, 0x3d10, 0x0da0, + 0x0258, 0x1a34, 0x267c, 0x3e10, 0x0ea0, + 0x025c, 0x1a38, 0x2680, 0x3e14, 0x0ea4, + 0x015c, 0x1938, 0x2580, 0x3d14, 0x0da4, + 0x005c, 0x1838, 0x2480, 0x3c14, 0x0ca4, + 0x0060, 0x183c, 0x2484, 0x3c18, 0x0ca8, + 0x0160, 0x193c, 0x2584, 0x3d18, 0x0da8, + 0x0260, 0x1a3c, 0x2684, 0x3e18, 0x0ea8, + 0x0264, 0x1a40, 0x2688, 0x3e1c, 0x0eac, + 0x0164, 0x1940, 0x2588, 0x3d1c, 0x0dac, + 0x0064, 0x1840, 0x2488, 0x3c1c, 0x0cac, + 0x0068, 0x1844, 0x248c, 0x3c20, 0x0cb0, + 0x0168, 0x1944, 0x258c, 0x3d20, 0x0db0, + 0x0268, 0x1a44, 0x268c, 0x3e20, 0x0eb0, + 0x0648, 0x1e24, 0x2a6c, 0x4200, 0x1290, + 0x0748, 0x1f24, 0x2b6c, 0x4300, 0x1390, + 0x0848, 0x2024, 0x2c6c, 0x4400, 0x1490, + 0x084c, 0x2028, 0x2c70, 0x4404, 0x1494, + 0x074c, 0x1f28, 0x2b70, 0x4304, 0x1394, + 0x064c, 0x1e28, 0x2a70, 0x4204, 0x1294, + 0x0650, 0x1e2c, 0x2a74, 0x4208, 0x1298, + 0x0750, 0x1f2c, 0x2b74, 0x4308, 0x1398, + 0x0850, 0x202c, 0x2c74, 0x4408, 0x1498, + 0x0854, 0x2030, 0x2c78, 0x440c, 0x149c, + 0x0754, 0x1f30, 0x2b78, 0x430c, 0x139c, + 0x0654, 0x1e30, 0x2a78, 0x420c, 0x129c, + 0x0658, 0x1e34, 0x2a7c, 0x4210, 0x12a0, + 0x0758, 0x1f34, 0x2b7c, 0x4310, 0x13a0, + 0x0858, 0x2034, 0x2c7c, 0x4410, 0x14a0, + 0x085c, 0x2038, 0x2c80, 0x4414, 0x14a4, + 0x075c, 0x1f38, 0x2b80, 0x4314, 0x13a4, + 0x065c, 0x1e38, 0x2a80, 0x4214, 0x12a4, + 0x0660, 0x1e3c, 0x2a84, 0x4218, 0x12a8, + 0x0760, 0x1f3c, 0x2b84, 0x4318, 0x13a8, + 0x0860, 0x203c, 0x2c84, 0x4418, 0x14a8, + 0x0864, 0x2040, 0x2c88, 0x441c, 0x14ac, + 0x0764, 0x1f40, 0x2b88, 0x431c, 0x13ac, + 0x0664, 0x1e40, 0x2a88, 0x421c, 0x12ac, + 0x0668, 0x1e44, 0x2a8c, 0x4220, 0x12b0, + 0x0768, 0x1f44, 0x2b8c, 0x4320, 0x13b0, + 0x0868, 0x2044, 0x2c8c, 0x4420, 0x14b0, + 0x0f48, 0x2724, 0x336c, 0x0300, 0x1b90, + 0x1048, 0x2824, 0x346c, 0x0400, 0x1c90, + 0x1148, 0x2924, 0x356c, 0x0500, 0x1d90, + 0x114c, 0x2928, 0x3570, 0x0504, 0x1d94, + 0x104c, 0x2828, 0x3470, 0x0404, 0x1c94, + 0x0f4c, 0x2728, 0x3370, 0x0304, 0x1b94, + 0x0f50, 0x272c, 0x3374, 0x0308, 0x1b98, + 0x1050, 0x282c, 0x3474, 0x0408, 0x1c98, + 0x1150, 0x292c, 0x3574, 0x0508, 0x1d98, + 0x1154, 0x2930, 0x3578, 0x050c, 0x1d9c, + 0x1054, 0x2830, 0x3478, 0x040c, 0x1c9c, + 0x0f54, 0x2730, 0x3378, 0x030c, 0x1b9c, + 0x0f58, 0x2734, 0x337c, 0x0310, 0x1ba0, + 0x1058, 0x2834, 0x347c, 0x0410, 0x1ca0, + 0x1158, 0x2934, 0x357c, 0x0510, 0x1da0, + 0x115c, 0x2938, 0x3580, 0x0514, 0x1da4, + 0x105c, 0x2838, 0x3480, 0x0414, 0x1ca4, + 0x0f5c, 0x2738, 0x3380, 0x0314, 0x1ba4, + 0x0f60, 0x273c, 0x3384, 0x0318, 0x1ba8, + 0x1060, 0x283c, 0x3484, 0x0418, 0x1ca8, + 0x1160, 0x293c, 0x3584, 0x0518, 0x1da8, + 0x1164, 0x2940, 0x3588, 0x051c, 0x1dac, + 0x1064, 0x2840, 0x3488, 0x041c, 0x1cac, + 0x0f64, 0x2740, 0x3388, 0x031c, 0x1bac, + 0x0f68, 0x2744, 0x338c, 0x0320, 0x1bb0, + 0x1068, 0x2844, 0x348c, 0x0420, 0x1cb0, + 0x1168, 0x2944, 0x358c, 0x0520, 0x1db0, + 0x1548, 0x2d24, 0x396c, 0x0900, 0x2190, + 0x1648, 0x2e24, 0x3a6c, 0x0a00, 0x2290, + 0x1748, 0x2f24, 0x3b6c, 0x0b00, 0x2390, + 0x174c, 0x2f28, 0x3b70, 0x0b04, 0x2394, + 0x164c, 0x2e28, 0x3a70, 0x0a04, 0x2294, + 0x154c, 0x2d28, 0x3970, 0x0904, 0x2194, + 0x1550, 0x2d2c, 0x3974, 0x0908, 0x2198, + 0x1650, 0x2e2c, 0x3a74, 0x0a08, 0x2298, + 0x1750, 0x2f2c, 0x3b74, 0x0b08, 0x2398, + 0x1754, 0x2f30, 0x3b78, 0x0b0c, 0x239c, + 0x1654, 0x2e30, 0x3a78, 0x0a0c, 0x229c, + 0x1554, 0x2d30, 0x3978, 0x090c, 0x219c, + 0x1558, 0x2d34, 0x397c, 0x0910, 0x21a0, + 0x1658, 0x2e34, 0x3a7c, 0x0a10, 0x22a0, + 0x1758, 0x2f34, 0x3b7c, 0x0b10, 0x23a0, + 0x175c, 0x2f38, 0x3b80, 0x0b14, 0x23a4, + 0x165c, 0x2e38, 0x3a80, 0x0a14, 0x22a4, + 0x155c, 0x2d38, 0x3980, 0x0914, 0x21a4, + 0x1560, 0x2d3c, 0x3984, 0x0918, 0x21a8, + 0x1660, 0x2e3c, 0x3a84, 0x0a18, 0x22a8, + 0x1760, 0x2f3c, 0x3b84, 0x0b18, 0x23a8, + 0x1764, 0x2f40, 0x3b88, 0x0b1c, 0x23ac, + 0x1664, 0x2e40, 0x3a88, 0x0a1c, 0x22ac, + 0x1564, 0x2d40, 0x3988, 0x091c, 0x21ac, + 0x1568, 0x2d44, 0x398c, 0x0920, 0x21b0, + 0x1668, 0x2e44, 0x3a8c, 0x0a20, 0x22b0, + 0x1768, 0x2f44, 0x3b8c, 0x0b20, 0x23b0, + 0x1b48, 0x3324, 0x3f6c, 0x0f00, 0x2790, + 0x1c48, 0x3424, 0x406c, 0x1000, 0x2890, + 0x1d48, 0x3524, 0x416c, 0x1100, 0x2990, + 0x1d4c, 0x3528, 0x4170, 0x1104, 0x2994, + 0x1c4c, 0x3428, 0x4070, 0x1004, 0x2894, + 0x1b4c, 0x3328, 0x3f70, 0x0f04, 0x2794, + 0x1b50, 0x332c, 0x3f74, 0x0f08, 0x2798, + 0x1c50, 0x342c, 0x4074, 0x1008, 0x2898, + 0x1d50, 0x352c, 0x4174, 0x1108, 0x2998, + 0x1d54, 0x3530, 0x4178, 0x110c, 0x299c, + 0x1c54, 0x3430, 0x4078, 0x100c, 0x289c, + 0x1b54, 0x3330, 0x3f78, 0x0f0c, 0x279c, + 0x1b58, 0x3334, 0x3f7c, 0x0f10, 0x27a0, + 0x1c58, 0x3434, 0x407c, 0x1010, 0x28a0, + 0x1d58, 0x3534, 0x417c, 0x1110, 0x29a0, + 0x1d5c, 0x3538, 0x4180, 0x1114, 0x29a4, + 0x1c5c, 0x3438, 0x4080, 0x1014, 0x28a4, + 0x1b5c, 0x3338, 0x3f80, 0x0f14, 0x27a4, + 0x1b60, 0x333c, 0x3f84, 0x0f18, 0x27a8, + 0x1c60, 0x343c, 0x4084, 0x1018, 0x28a8, + 0x1d60, 0x353c, 0x4184, 0x1118, 0x29a8, + 0x1d64, 0x3540, 0x4188, 0x111c, 0x29ac, + 0x1c64, 0x3440, 0x4088, 0x101c, 0x28ac, + 0x1b64, 0x3340, 0x3f88, 0x0f1c, 0x27ac, + 0x1b68, 0x3344, 0x3f8c, 0x0f20, 0x27b0, + 0x1c68, 0x3444, 0x408c, 0x1020, 0x28b0, + 0x1d68, 0x3544, 0x418c, 0x1120, 0x29b0, + 0x2148, 0x3924, 0x456c, 0x1500, 0x2d90, + 0x2248, 0x3a24, 0x466c, 0x1600, 0x2e90, + 0x2348, 0x3b24, 0x476c, 0x1700, 0x2f90, + 0x234c, 0x3b28, 0x4770, 0x1704, 0x2f94, + 0x224c, 0x3a28, 0x4670, 0x1604, 0x2e94, + 0x214c, 0x3928, 0x4570, 0x1504, 0x2d94, + 0x2150, 0x392c, 0x4574, 0x1508, 0x2d98, + 0x2250, 0x3a2c, 0x4674, 0x1608, 0x2e98, + 0x2350, 0x3b2c, 0x4774, 0x1708, 0x2f98, + 0x2354, 0x3b30, 0x4778, 0x170c, 0x2f9c, + 0x2254, 0x3a30, 0x4678, 0x160c, 0x2e9c, + 0x2154, 0x3930, 0x4578, 0x150c, 0x2d9c, + 0x2158, 0x3934, 0x457c, 0x1510, 0x2da0, + 0x2258, 0x3a34, 0x467c, 0x1610, 0x2ea0, + 0x2358, 0x3b34, 0x477c, 0x1710, 0x2fa0, + 0x235c, 0x3b38, 0x4780, 0x1714, 0x2fa4, + 0x225c, 0x3a38, 0x4680, 0x1614, 0x2ea4, + 0x215c, 0x3938, 0x4580, 0x1514, 0x2da4, + 0x2160, 0x393c, 0x4584, 0x1518, 0x2da8, + 0x2260, 0x3a3c, 0x4684, 0x1618, 0x2ea8, + 0x2360, 0x3b3c, 0x4784, 0x1718, 0x2fa8, + 0x2364, 0x3b40, 0x4788, 0x171c, 0x2fac, + 0x2264, 0x3a40, 0x4688, 0x161c, 0x2eac, + 0x2164, 0x3940, 0x4588, 0x151c, 0x2dac, + 0x2168, 0x3944, 0x458c, 0x1520, 0x2db0, + 0x2268, 0x3a44, 0x468c, 0x1620, 0x2eb0, + 0x2368, 0x3b44, 0x478c, 0x1720, 0x2fb0, + 0x2748, 0x3f24, 0x036c, 0x1b00, 0x3390, + 0x2848, 0x4024, 0x046c, 0x1c00, 0x3490, + 0x2948, 0x4124, 0x056c, 0x1d00, 0x3590, + 0x294c, 0x4128, 0x0570, 0x1d04, 0x3594, + 0x284c, 0x4028, 0x0470, 0x1c04, 0x3494, + 0x274c, 0x3f28, 0x0370, 0x1b04, 0x3394, + 0x2750, 0x3f2c, 0x0374, 0x1b08, 0x3398, + 0x2850, 0x402c, 0x0474, 0x1c08, 0x3498, + 0x2950, 0x412c, 0x0574, 0x1d08, 0x3598, + 0x2954, 0x4130, 0x0578, 0x1d0c, 0x359c, + 0x2854, 0x4030, 0x0478, 0x1c0c, 0x349c, + 0x2754, 0x3f30, 0x0378, 0x1b0c, 0x339c, + 0x2758, 0x3f34, 0x037c, 0x1b10, 0x33a0, + 0x2858, 0x4034, 0x047c, 0x1c10, 0x34a0, + 0x2958, 0x4134, 0x057c, 0x1d10, 0x35a0, + 0x295c, 0x4138, 0x0580, 0x1d14, 0x35a4, + 0x285c, 0x4038, 0x0480, 0x1c14, 0x34a4, + 0x275c, 0x3f38, 0x0380, 0x1b14, 0x33a4, + 0x2760, 0x3f3c, 0x0384, 0x1b18, 0x33a8, + 0x2860, 0x403c, 0x0484, 0x1c18, 0x34a8, + 0x2960, 0x413c, 0x0584, 0x1d18, 0x35a8, + 0x2964, 0x4140, 0x0588, 0x1d1c, 0x35ac, + 0x2864, 0x4040, 0x0488, 0x1c1c, 0x34ac, + 0x2764, 0x3f40, 0x0388, 0x1b1c, 0x33ac, + 0x2768, 0x3f44, 0x038c, 0x1b20, 0x33b0, + 0x2868, 0x4044, 0x048c, 0x1c20, 0x34b0, + 0x2968, 0x4144, 0x058c, 0x1d20, 0x35b0, + 0x2d48, 0x4524, 0x096c, 0x2100, 0x3990, + 0x2e48, 0x4624, 0x0a6c, 0x2200, 0x3a90, + 0x2f48, 0x4724, 0x0b6c, 0x2300, 0x3b90, + 0x2f4c, 0x4728, 0x0b70, 0x2304, 0x3b94, + 0x2e4c, 0x4628, 0x0a70, 0x2204, 0x3a94, + 0x2d4c, 0x4528, 0x0970, 0x2104, 0x3994, + 0x2d50, 0x452c, 0x0974, 0x2108, 0x3998, + 0x2e50, 0x462c, 0x0a74, 0x2208, 0x3a98, + 0x2f50, 0x472c, 0x0b74, 0x2308, 0x3b98, + 0x2f54, 0x4730, 0x0b78, 0x230c, 0x3b9c, + 0x2e54, 0x4630, 0x0a78, 0x220c, 0x3a9c, + 0x2d54, 0x4530, 0x0978, 0x210c, 0x399c, + 0x2d58, 0x4534, 0x097c, 0x2110, 0x39a0, + 0x2e58, 0x4634, 0x0a7c, 0x2210, 0x3aa0, + 0x2f58, 0x4734, 0x0b7c, 0x2310, 0x3ba0, + 0x2f5c, 0x4738, 0x0b80, 0x2314, 0x3ba4, + 0x2e5c, 0x4638, 0x0a80, 0x2214, 0x3aa4, + 0x2d5c, 0x4538, 0x0980, 0x2114, 0x39a4, + 0x2d60, 0x453c, 0x0984, 0x2118, 0x39a8, + 0x2e60, 0x463c, 0x0a84, 0x2218, 0x3aa8, + 0x2f60, 0x473c, 0x0b84, 0x2318, 0x3ba8, + 0x2f64, 0x4740, 0x0b88, 0x231c, 0x3bac, + 0x2e64, 0x4640, 0x0a88, 0x221c, 0x3aac, + 0x2d64, 0x4540, 0x0988, 0x211c, 0x39ac, + 0x2d68, 0x4544, 0x098c, 0x2120, 0x39b0, + 0x2e68, 0x4644, 0x0a8c, 0x2220, 0x3ab0, + 0x2f68, 0x4744, 0x0b8c, 0x2320, 0x3bb0, + 0x3348, 0x0324, 0x0f6c, 0x2700, 0x3f90, + 0x3448, 0x0424, 0x106c, 0x2800, 0x4090, + 0x3548, 0x0524, 0x116c, 0x2900, 0x4190, + 0x354c, 0x0528, 0x1170, 0x2904, 0x4194, + 0x344c, 0x0428, 0x1070, 0x2804, 0x4094, + 0x334c, 0x0328, 0x0f70, 0x2704, 0x3f94, + 0x3350, 0x032c, 0x0f74, 0x2708, 0x3f98, + 0x3450, 0x042c, 0x1074, 0x2808, 0x4098, + 0x3550, 0x052c, 0x1174, 0x2908, 0x4198, + 0x3554, 0x0530, 0x1178, 0x290c, 0x419c, + 0x3454, 0x0430, 0x1078, 0x280c, 0x409c, + 0x3354, 0x0330, 0x0f78, 0x270c, 0x3f9c, + 0x3358, 0x0334, 0x0f7c, 0x2710, 0x3fa0, + 0x3458, 0x0434, 0x107c, 0x2810, 0x40a0, + 0x3558, 0x0534, 0x117c, 0x2910, 0x41a0, + 0x355c, 0x0538, 0x1180, 0x2914, 0x41a4, + 0x345c, 0x0438, 0x1080, 0x2814, 0x40a4, + 0x335c, 0x0338, 0x0f80, 0x2714, 0x3fa4, + 0x3360, 0x033c, 0x0f84, 0x2718, 0x3fa8, + 0x3460, 0x043c, 0x1084, 0x2818, 0x40a8, + 0x3560, 0x053c, 0x1184, 0x2918, 0x41a8, + 0x3564, 0x0540, 0x1188, 0x291c, 0x41ac, + 0x3464, 0x0440, 0x1088, 0x281c, 0x40ac, + 0x3364, 0x0340, 0x0f88, 0x271c, 0x3fac, + 0x3368, 0x0344, 0x0f8c, 0x2720, 0x3fb0, + 0x3468, 0x0444, 0x108c, 0x2820, 0x40b0, + 0x3568, 0x0544, 0x118c, 0x2920, 0x41b0, + 0x3948, 0x0924, 0x156c, 0x2d00, 0x4590, + 0x3a48, 0x0a24, 0x166c, 0x2e00, 0x4690, + 0x3b48, 0x0b24, 0x176c, 0x2f00, 0x4790, + 0x3b4c, 0x0b28, 0x1770, 0x2f04, 0x4794, + 0x3a4c, 0x0a28, 0x1670, 0x2e04, 0x4694, + 0x394c, 0x0928, 0x1570, 0x2d04, 0x4594, + 0x3950, 0x092c, 0x1574, 0x2d08, 0x4598, + 0x3a50, 0x0a2c, 0x1674, 0x2e08, 0x4698, + 0x3b50, 0x0b2c, 0x1774, 0x2f08, 0x4798, + 0x3b54, 0x0b30, 0x1778, 0x2f0c, 0x479c, + 0x3a54, 0x0a30, 0x1678, 0x2e0c, 0x469c, + 0x3954, 0x0930, 0x1578, 0x2d0c, 0x459c, + 0x3958, 0x0934, 0x157c, 0x2d10, 0x45a0, + 0x3a58, 0x0a34, 0x167c, 0x2e10, 0x46a0, + 0x3b58, 0x0b34, 0x177c, 0x2f10, 0x47a0, + 0x3b5c, 0x0b38, 0x1780, 0x2f14, 0x47a4, + 0x3a5c, 0x0a38, 0x1680, 0x2e14, 0x46a4, + 0x395c, 0x0938, 0x1580, 0x2d14, 0x45a4, + 0x3960, 0x093c, 0x1584, 0x2d18, 0x45a8, + 0x3a60, 0x0a3c, 0x1684, 0x2e18, 0x46a8, + 0x3b60, 0x0b3c, 0x1784, 0x2f18, 0x47a8, + 0x3b64, 0x0b40, 0x1788, 0x2f1c, 0x47ac, + 0x3a64, 0x0a40, 0x1688, 0x2e1c, 0x46ac, + 0x3964, 0x0940, 0x1588, 0x2d1c, 0x45ac, + 0x3968, 0x0944, 0x158c, 0x2d20, 0x45b0, + 0x3a68, 0x0a44, 0x168c, 0x2e20, 0x46b0, + 0x3b68, 0x0b44, 0x178c, 0x2f20, 0x47b0, + 0x3f48, 0x0f24, 0x1b6c, 0x3300, 0x0390, + 0x4048, 0x1024, 0x1c6c, 0x3400, 0x0490, + 0x4148, 0x1124, 0x1d6c, 0x3500, 0x0590, + 0x414c, 0x1128, 0x1d70, 0x3504, 0x0594, + 0x404c, 0x1028, 0x1c70, 0x3404, 0x0494, + 0x3f4c, 0x0f28, 0x1b70, 0x3304, 0x0394, + 0x3f50, 0x0f2c, 0x1b74, 0x3308, 0x0398, + 0x4050, 0x102c, 0x1c74, 0x3408, 0x0498, + 0x4150, 0x112c, 0x1d74, 0x3508, 0x0598, + 0x4154, 0x1130, 0x1d78, 0x350c, 0x059c, + 0x4054, 0x1030, 0x1c78, 0x340c, 0x049c, + 0x3f54, 0x0f30, 0x1b78, 0x330c, 0x039c, + 0x3f58, 0x0f34, 0x1b7c, 0x3310, 0x03a0, + 0x4058, 0x1034, 0x1c7c, 0x3410, 0x04a0, + 0x4158, 0x1134, 0x1d7c, 0x3510, 0x05a0, + 0x415c, 0x1138, 0x1d80, 0x3514, 0x05a4, + 0x405c, 0x1038, 0x1c80, 0x3414, 0x04a4, + 0x3f5c, 0x0f38, 0x1b80, 0x3314, 0x03a4, + 0x3f60, 0x0f3c, 0x1b84, 0x3318, 0x03a8, + 0x4060, 0x103c, 0x1c84, 0x3418, 0x04a8, + 0x4160, 0x113c, 0x1d84, 0x3518, 0x05a8, + 0x4164, 0x1140, 0x1d88, 0x351c, 0x05ac, + 0x4064, 0x1040, 0x1c88, 0x341c, 0x04ac, + 0x3f64, 0x0f40, 0x1b88, 0x331c, 0x03ac, + 0x3f68, 0x0f44, 0x1b8c, 0x3320, 0x03b0, + 0x4068, 0x1044, 0x1c8c, 0x3420, 0x04b0, + 0x4168, 0x1144, 0x1d8c, 0x3520, 0x05b0, + 0x4548, 0x1524, 0x216c, 0x3900, 0x0990, + 0x4648, 0x1624, 0x226c, 0x3a00, 0x0a90, + 0x4748, 0x1724, 0x236c, 0x3b00, 0x0b90, + 0x474c, 0x1728, 0x2370, 0x3b04, 0x0b94, + 0x464c, 0x1628, 0x2270, 0x3a04, 0x0a94, + 0x454c, 0x1528, 0x2170, 0x3904, 0x0994, + 0x4550, 0x152c, 0x2174, 0x3908, 0x0998, + 0x4650, 0x162c, 0x2274, 0x3a08, 0x0a98, + 0x4750, 0x172c, 0x2374, 0x3b08, 0x0b98, + 0x4754, 0x1730, 0x2378, 0x3b0c, 0x0b9c, + 0x4654, 0x1630, 0x2278, 0x3a0c, 0x0a9c, + 0x4554, 0x1530, 0x2178, 0x390c, 0x099c, + 0x4558, 0x1534, 0x217c, 0x3910, 0x09a0, + 0x4658, 0x1634, 0x227c, 0x3a10, 0x0aa0, + 0x4758, 0x1734, 0x237c, 0x3b10, 0x0ba0, + 0x475c, 0x1738, 0x2380, 0x3b14, 0x0ba4, + 0x465c, 0x1638, 0x2280, 0x3a14, 0x0aa4, + 0x455c, 0x1538, 0x2180, 0x3914, 0x09a4, + 0x4560, 0x153c, 0x2184, 0x3918, 0x09a8, + 0x4660, 0x163c, 0x2284, 0x3a18, 0x0aa8, + 0x4760, 0x173c, 0x2384, 0x3b18, 0x0ba8, + 0x4764, 0x1740, 0x2388, 0x3b1c, 0x0bac, + 0x4664, 0x1640, 0x2288, 0x3a1c, 0x0aac, + 0x4564, 0x1540, 0x2188, 0x391c, 0x09ac, + 0x4568, 0x1544, 0x218c, 0x3920, 0x09b0, + 0x4668, 0x1644, 0x228c, 0x3a20, 0x0ab0, + 0x4768, 0x1744, 0x238c, 0x3b20, 0x0bb0, + 0x0348, 0x1b24, 0x276c, 0x3f00, 0x0f90, + 0x0448, 0x1c24, 0x286c, 0x4000, 0x1090, + 0x0548, 0x1d24, 0x296c, 0x4100, 0x1190, + 0x054c, 0x1d28, 0x2970, 0x4104, 0x1194, + 0x044c, 0x1c28, 0x2870, 0x4004, 0x1094, + 0x034c, 0x1b28, 0x2770, 0x3f04, 0x0f94, + 0x0350, 0x1b2c, 0x2774, 0x3f08, 0x0f98, + 0x0450, 0x1c2c, 0x2874, 0x4008, 0x1098, + 0x0550, 0x1d2c, 0x2974, 0x4108, 0x1198, + 0x0554, 0x1d30, 0x2978, 0x410c, 0x119c, + 0x0454, 0x1c30, 0x2878, 0x400c, 0x109c, + 0x0354, 0x1b30, 0x2778, 0x3f0c, 0x0f9c, + 0x0358, 0x1b34, 0x277c, 0x3f10, 0x0fa0, + 0x0458, 0x1c34, 0x287c, 0x4010, 0x10a0, + 0x0558, 0x1d34, 0x297c, 0x4110, 0x11a0, + 0x055c, 0x1d38, 0x2980, 0x4114, 0x11a4, + 0x045c, 0x1c38, 0x2880, 0x4014, 0x10a4, + 0x035c, 0x1b38, 0x2780, 0x3f14, 0x0fa4, + 0x0360, 0x1b3c, 0x2784, 0x3f18, 0x0fa8, + 0x0460, 0x1c3c, 0x2884, 0x4018, 0x10a8, + 0x0560, 0x1d3c, 0x2984, 0x4118, 0x11a8, + 0x0564, 0x1d40, 0x2988, 0x411c, 0x11ac, + 0x0464, 0x1c40, 0x2888, 0x401c, 0x10ac, + 0x0364, 0x1b40, 0x2788, 0x3f1c, 0x0fac, + 0x0368, 0x1b44, 0x278c, 0x3f20, 0x0fb0, + 0x0468, 0x1c44, 0x288c, 0x4020, 0x10b0, + 0x0568, 0x1d44, 0x298c, 0x4120, 0x11b0, + 0x0948, 0x2124, 0x2d6c, 0x4500, 0x1590, + 0x0a48, 0x2224, 0x2e6c, 0x4600, 0x1690, + 0x0b48, 0x2324, 0x2f6c, 0x4700, 0x1790, + 0x0b4c, 0x2328, 0x2f70, 0x4704, 0x1794, + 0x0a4c, 0x2228, 0x2e70, 0x4604, 0x1694, + 0x094c, 0x2128, 0x2d70, 0x4504, 0x1594, + 0x0950, 0x212c, 0x2d74, 0x4508, 0x1598, + 0x0a50, 0x222c, 0x2e74, 0x4608, 0x1698, + 0x0b50, 0x232c, 0x2f74, 0x4708, 0x1798, + 0x0b54, 0x2330, 0x2f78, 0x470c, 0x179c, + 0x0a54, 0x2230, 0x2e78, 0x460c, 0x169c, + 0x0954, 0x2130, 0x2d78, 0x450c, 0x159c, + 0x0958, 0x2134, 0x2d7c, 0x4510, 0x15a0, + 0x0a58, 0x2234, 0x2e7c, 0x4610, 0x16a0, + 0x0b58, 0x2334, 0x2f7c, 0x4710, 0x17a0, + 0x0b5c, 0x2338, 0x2f80, 0x4714, 0x17a4, + 0x0a5c, 0x2238, 0x2e80, 0x4614, 0x16a4, + 0x095c, 0x2138, 0x2d80, 0x4514, 0x15a4, + 0x0960, 0x213c, 0x2d84, 0x4518, 0x15a8, + 0x0a60, 0x223c, 0x2e84, 0x4618, 0x16a8, + 0x0b60, 0x233c, 0x2f84, 0x4718, 0x17a8, + 0x0b64, 0x2340, 0x2f88, 0x471c, 0x17ac, + 0x0a64, 0x2240, 0x2e88, 0x461c, 0x16ac, + 0x0964, 0x2140, 0x2d88, 0x451c, 0x15ac, + 0x0968, 0x2144, 0x2d8c, 0x4520, 0x15b0, + 0x0a68, 0x2244, 0x2e8c, 0x4620, 0x16b0, + 0x0b68, 0x2344, 0x2f8c, 0x4720, 0x17b0, +}; + +/* DV25/50 DCT coefficient weights and inverse weights */ +/* created by dvtables.py */ +static const int dv_weight_bits = 18; +static const int dv_weight_88[64] = { + 131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536, + 237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935, + 224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916, + 212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433, + 206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704, + 200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568, + 174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627, + 170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258, +}; +static const int dv_weight_248[64] = { + 131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754, + 224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536, + 211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568, + 242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965, + 200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627, + 229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965, + 175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364, + 195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651, +}; +static const int dv_iweight_bits = 14; +static const int dv_iweight_88[64] = { + 32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079, + 18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284, + 19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262, + 20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815, + 20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400, + 21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764, + 24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191, + 25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536, +}; +static const int dv_iweight_248[64] = { + 32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196, + 19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079, + 20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764, + 17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692, + 21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191, + 18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867, + 24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173, + 22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642, +}; + +static const uint16_t dv_audio_shuffle525[10][9] = { + { 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */ + { 6, 36, 66, 26, 56, 86, 16, 46, 76 }, + { 12, 42, 72, 2, 32, 62, 22, 52, 82 }, + { 18, 48, 78, 8, 38, 68, 28, 58, 88 }, + { 24, 54, 84, 14, 44, 74, 4, 34, 64 }, + + { 1, 31, 61, 21, 51, 81, 11, 41, 71 }, /* 2nd channel */ + { 7, 37, 67, 27, 57, 87, 17, 47, 77 }, + { 13, 43, 73, 3, 33, 63, 23, 53, 83 }, + { 19, 49, 79, 9, 39, 69, 29, 59, 89 }, + { 25, 55, 85, 15, 45, 75, 5, 35, 65 }, +}; + +static const uint16_t dv_audio_shuffle625[12][9] = { + { 0, 36, 72, 26, 62, 98, 16, 52, 88}, /* 1st channel */ + { 6, 42, 78, 32, 68, 104, 22, 58, 94}, + { 12, 48, 84, 2, 38, 74, 28, 64, 100}, + { 18, 54, 90, 8, 44, 80, 34, 70, 106}, + { 24, 60, 96, 14, 50, 86, 4, 40, 76}, + { 30, 66, 102, 20, 56, 92, 10, 46, 82}, + + { 1, 37, 73, 27, 63, 99, 17, 53, 89}, /* 2nd channel */ + { 7, 43, 79, 33, 69, 105, 23, 59, 95}, + { 13, 49, 85, 3, 39, 75, 29, 65, 101}, + { 19, 55, 91, 9, 45, 81, 35, 71, 107}, + { 25, 61, 97, 15, 51, 87, 5, 41, 77}, + { 31, 67, 103, 21, 57, 93, 11, 47, 83}, +}; + +static const __attribute__((unused)) int dv_audio_frequency[3] = { + 48000, 44100, 32000, +}; + +static const DVprofile dv_profiles[] = { + { .dsf = 0, + .frame_size = 120000, /* IEC 61834, SMPTE-314M - 525/60 (NTSC) */ + .difseg_size = 10, + .n_difchan = 1, + .frame_rate = 30000, + .ltc_divisor = 30, + .frame_rate_base = 1001, + .height = 480, + .width = 720, + .sar = {{10, 11}, {40, 33}}, + .video_place = dv_place_411, + .pix_fmt = PIX_FMT_YUV411P, + .audio_stride = 90, + .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1600, 1602, 1602, 1602, 1602 }, /* per SMPTE-314M */ + .audio_shuffle = dv_audio_shuffle525, + }, + { .dsf = 1, + .frame_size = 144000, /* IEC 61834 - 625/50 (PAL) */ + .difseg_size = 12, + .n_difchan = 1, + .frame_rate = 25, + .frame_rate_base = 1, + .ltc_divisor = 25, + .height = 576, + .width = 720, + .sar = {{59, 54}, {118, 81}}, + .video_place = dv_place_420, + .pix_fmt = PIX_FMT_YUV420P, + .audio_stride = 108, + .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 }, + .audio_shuffle = dv_audio_shuffle625, + }, + { .dsf = 1, + .frame_size = 144000, /* SMPTE-314M - 625/50 (PAL) */ + .difseg_size = 12, + .n_difchan = 1, + .frame_rate = 25, + .frame_rate_base = 1, + .ltc_divisor = 25, + .height = 576, + .width = 720, + .sar = {{59, 54}, {118, 81}}, + .video_place = dv_place_411P, + .pix_fmt = PIX_FMT_YUV411P, + .audio_stride = 108, + .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 }, + .audio_shuffle = dv_audio_shuffle625, + }, + { .dsf = 0, + .frame_size = 240000, /* SMPTE-314M - 525/60 (NTSC) 50 Mbps */ + .difseg_size = 10, /* also known as "DVCPRO50" */ + .n_difchan = 2, + .frame_rate = 30000, + .ltc_divisor = 30, + .frame_rate_base = 1001, + .height = 480, + .width = 720, + .sar = {{10, 11}, {40, 33}}, + .video_place = dv_place_422_525, + .pix_fmt = PIX_FMT_YUV422P, + .audio_stride = 90, + .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1600, 1602, 1602, 1602, 1602 }, /* per SMPTE-314M */ + .audio_shuffle = dv_audio_shuffle525, + }, + { .dsf = 1, + .frame_size = 288000, /* SMPTE-314M - 625/50 (PAL) 50 Mbps */ + .difseg_size = 12, /* also known as "DVCPRO50" */ + .n_difchan = 2, + .frame_rate = 25, + .frame_rate_base = 1, + .ltc_divisor = 25, + .height = 576, + .width = 720, + .sar = {{59, 54}, {118, 81}}, + .video_place = dv_place_422_625, + .pix_fmt = PIX_FMT_YUV422P, + .audio_stride = 108, + .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 }, + .audio_shuffle = dv_audio_shuffle625, + } +}; + +/* minimum number of bytes to read from a DV stream in order to determine the profile */ +#define DV_PROFILE_BYTES (6*80) /* 6 DIF blocks */ + +/* largest possible DV frame, in bytes (PAL 50Mbps) */ +#define DV_MAX_FRAME_SIZE 288000 + +static inline const DVprofile* dv_frame_profile(uint8_t* frame) +{ + if ((frame[3] & 0x80) == 0) { /* DSF flag */ + /* it's an NTSC format */ + if ((frame[80*5 + 48 + 3] & 0x4)) { /* 4:2:2 sampling */ + return &dv_profiles[3]; /* NTSC 50Mbps */ + } else { /* 4:1:1 sampling */ + return &dv_profiles[0]; /* NTSC 25Mbps */ + } + } else { + /* it's a PAL format */ + if ((frame[80*5 + 48 + 3] & 0x4)) { /* 4:2:2 sampling */ + return &dv_profiles[4]; /* PAL 50Mbps */ + } else if ((frame[5] & 0x07) == 0) { /* APT flag */ + return &dv_profiles[1]; /* PAL 25Mbps 4:2:0 */ + } else + return &dv_profiles[2]; /* PAL 25Mbps 4:1:1 */ + } +} + +static inline const DVprofile* dv_codec_profile(AVCodecContext* codec) +{ + int i; + + if (codec->width != 720) + return NULL; + + for (i=0; iheight == dv_profiles[i].height && codec->pix_fmt == dv_profiles[i].pix_fmt) + return &dv_profiles[i]; + + return NULL; +} diff --git a/mpeg4/src/libavcodec/dvdsub.c b/mpeg4/src/libavcodec/dvdsub.c new file mode 100644 index 0000000000000000000000000000000000000000..7a075871331bb8c84a9eb0ecec20618856b18ca7 --- /dev/null +++ b/mpeg4/src/libavcodec/dvdsub.c @@ -0,0 +1,478 @@ +/* + * DVD subtitle decoding for ffmpeg + * Copyright (c) 2005 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "avcodec.h" + +//#define DEBUG + +typedef struct DVDSubContext { +} DVDSubContext; + +static int dvdsub_init_decoder(AVCodecContext *avctx) +{ + return 0; +} + +static uint16_t getbe16(const uint8_t *p) +{ + return (p[0] << 8) | p[1]; +} + +static int get_nibble(const uint8_t *buf, int nibble_offset) +{ + return (buf[nibble_offset >> 1] >> ((1 - (nibble_offset & 1)) << 2)) & 0xf; +} + +static int decode_rle(uint8_t *bitmap, int linesize, int w, int h, + const uint8_t *buf, int nibble_offset, int buf_size) +{ + unsigned int v; + int x, y, len, color, nibble_end; + uint8_t *d; + + nibble_end = buf_size * 2; + x = 0; + y = 0; + d = bitmap; + for(;;) { + if (nibble_offset >= nibble_end) + return -1; + v = get_nibble(buf, nibble_offset++); + if (v < 0x4) { + v = (v << 4) | get_nibble(buf, nibble_offset++); + if (v < 0x10) { + v = (v << 4) | get_nibble(buf, nibble_offset++); + if (v < 0x040) { + v = (v << 4) | get_nibble(buf, nibble_offset++); + if (v < 4) { + v |= (w - x) << 2; + } + } + } + } + len = v >> 2; + if (len > (w - x)) + len = (w - x); + color = v & 0x03; + memset(d + x, color, len); + x += len; + if (x >= w) { + y++; + if (y >= h) + break; + d += linesize; + x = 0; + /* byte align */ + nibble_offset += (nibble_offset & 1); + } + } + return 0; +} + +static void guess_palette(uint32_t *rgba_palette, + uint8_t *palette, + uint8_t *alpha, + uint32_t subtitle_color) +{ + uint8_t color_used[16]; + int nb_opaque_colors, i, level, j, r, g, b; + + for(i = 0; i < 4; i++) + rgba_palette[i] = 0; + + memset(color_used, 0, 16); + nb_opaque_colors = 0; + for(i = 0; i < 4; i++) { + if (alpha[i] != 0 && !color_used[palette[i]]) { + color_used[palette[i]] = 1; + nb_opaque_colors++; + } + } + + if (nb_opaque_colors == 0) + return; + + j = nb_opaque_colors; + memset(color_used, 0, 16); + for(i = 0; i < 4; i++) { + if (alpha[i] != 0) { + if (!color_used[palette[i]]) { + level = (0xff * j) / nb_opaque_colors; + r = (((subtitle_color >> 16) & 0xff) * level) >> 8; + g = (((subtitle_color >> 8) & 0xff) * level) >> 8; + b = (((subtitle_color >> 0) & 0xff) * level) >> 8; + rgba_palette[i] = b | (g << 8) | (r << 16) | ((alpha[i] * 17) << 24); + color_used[palette[i]] = (i + 1); + j--; + } else { + rgba_palette[i] = (rgba_palette[color_used[palette[i]] - 1] & 0x00ffffff) | + ((alpha[i] * 17) << 24); + } + } + } +} + +static int decode_dvd_subtitles(AVSubtitle *sub_header, + const uint8_t *buf, int buf_size) +{ + int cmd_pos, pos, cmd, x1, y1, x2, y2, offset1, offset2, next_cmd_pos; + uint8_t palette[4], alpha[4]; + int date; + int i; + int is_menu = 0; + + if (buf_size < 4) + return -1; + sub_header->rects = NULL; + sub_header->num_rects = 0; + sub_header->start_display_time = 0; + sub_header->end_display_time = 0; + + cmd_pos = getbe16(buf + 2); + while ((cmd_pos + 4) < buf_size) { + date = getbe16(buf + cmd_pos); + next_cmd_pos = getbe16(buf + cmd_pos + 2); +#ifdef DEBUG + av_log(NULL, AV_LOG_INFO, "cmd_pos=0x%04x next=0x%04x date=%d\n", + cmd_pos, next_cmd_pos, date); +#endif + pos = cmd_pos + 4; + offset1 = -1; + offset2 = -1; + x1 = y1 = x2 = y2 = 0; + while (pos < buf_size) { + cmd = buf[pos++]; +#ifdef DEBUG + av_log(NULL, AV_LOG_INFO, "cmd=%02x\n", cmd); +#endif + switch(cmd) { + case 0x00: + /* menu subpicture */ + is_menu = 1; + break; + case 0x01: + /* set start date */ + sub_header->start_display_time = (date << 10) / 90; + break; + case 0x02: + /* set end date */ + sub_header->end_display_time = (date << 10) / 90; + break; + case 0x03: + /* set palette */ + if ((buf_size - pos) < 2) + goto fail; + palette[3] = buf[pos] >> 4; + palette[2] = buf[pos] & 0x0f; + palette[1] = buf[pos + 1] >> 4; + palette[0] = buf[pos + 1] & 0x0f; + pos += 2; + break; + case 0x04: + /* set alpha */ + if ((buf_size - pos) < 2) + goto fail; + alpha[3] = buf[pos] >> 4; + alpha[2] = buf[pos] & 0x0f; + alpha[1] = buf[pos + 1] >> 4; + alpha[0] = buf[pos + 1] & 0x0f; + pos += 2; +#ifdef DEBUG + av_log(NULL, AV_LOG_INFO, "alpha=%x%x%x%x\n", alpha[0],alpha[1],alpha[2],alpha[3]); +#endif + break; + case 0x05: + if ((buf_size - pos) < 6) + goto fail; + x1 = (buf[pos] << 4) | (buf[pos + 1] >> 4); + x2 = ((buf[pos + 1] & 0x0f) << 8) | buf[pos + 2]; + y1 = (buf[pos + 3] << 4) | (buf[pos + 4] >> 4); + y2 = ((buf[pos + 4] & 0x0f) << 8) | buf[pos + 5]; +#ifdef DEBUG + av_log(NULL, AV_LOG_INFO, "x1=%d x2=%d y1=%d y2=%d\n", + x1, x2, y1, y2); +#endif + pos += 6; + break; + case 0x06: + if ((buf_size - pos) < 4) + goto fail; + offset1 = getbe16(buf + pos); + offset2 = getbe16(buf + pos + 2); +#ifdef DEBUG + av_log(NULL, AV_LOG_INFO, "offset1=0x%04x offset2=0x%04x\n", offset1, offset2); +#endif + pos += 4; + break; + case 0xff: + default: + goto the_end; + } + } + the_end: + if (offset1 >= 0) { + int w, h; + uint8_t *bitmap; + + /* decode the bitmap */ + w = x2 - x1 + 1; + if (w < 0) + w = 0; + h = y2 - y1; + if (h < 0) + h = 0; + if (w > 0 && h > 0) { + if (sub_header->rects != NULL) { + for (i = 0; i < sub_header->num_rects; i++) { + av_free(sub_header->rects[i].bitmap); + av_free(sub_header->rects[i].rgba_palette); + } + av_freep(&sub_header->rects); + sub_header->num_rects = 0; + } + + bitmap = av_malloc(w * h); + sub_header->rects = av_mallocz(sizeof(AVSubtitleRect)); + sub_header->num_rects = 1; + sub_header->rects[0].rgba_palette = av_malloc(4 * 4); + decode_rle(bitmap, w * 2, w, h / 2, + buf, offset1 * 2, buf_size); + decode_rle(bitmap + w, w * 2, w, h / 2, + buf, offset2 * 2, buf_size); + guess_palette(sub_header->rects[0].rgba_palette, + palette, alpha, 0xffff00); + sub_header->rects[0].x = x1; + sub_header->rects[0].y = y1; + sub_header->rects[0].w = w; + sub_header->rects[0].h = h; + sub_header->rects[0].nb_colors = 4; + sub_header->rects[0].linesize = w; + sub_header->rects[0].bitmap = bitmap; + } + } + if (next_cmd_pos == cmd_pos) + break; + cmd_pos = next_cmd_pos; + } + if (sub_header->num_rects > 0) + return is_menu; + fail: + return -1; +} + +static int is_transp(const uint8_t *buf, int pitch, int n, + const uint8_t *transp_color) +{ + int i; + for(i = 0; i < n; i++) { + if (!transp_color[*buf]) + return 0; + buf += pitch; + } + return 1; +} + +/* return 0 if empty rectangle, 1 if non empty */ +static int find_smallest_bounding_rectangle(AVSubtitle *s) +{ + uint8_t transp_color[256]; + int y1, y2, x1, x2, y, w, h, i; + uint8_t *bitmap; + + if (s->num_rects == 0 || s->rects == NULL || s->rects[0].w <= 0 || s->rects[0].h <= 0) + return 0; + + memset(transp_color, 0, 256); + for(i = 0; i < s->rects[0].nb_colors; i++) { + if ((s->rects[0].rgba_palette[i] >> 24) == 0) + transp_color[i] = 1; + } + y1 = 0; + while (y1 < s->rects[0].h && is_transp(s->rects[0].bitmap + y1 * s->rects[0].linesize, + 1, s->rects[0].w, transp_color)) + y1++; + if (y1 == s->rects[0].h) { + av_freep(&s->rects[0].bitmap); + s->rects[0].w = s->rects[0].h = 0; + return 0; + } + + y2 = s->rects[0].h - 1; + while (y2 > 0 && is_transp(s->rects[0].bitmap + y2 * s->rects[0].linesize, 1, + s->rects[0].w, transp_color)) + y2--; + x1 = 0; + while (x1 < (s->rects[0].w - 1) && is_transp(s->rects[0].bitmap + x1, s->rects[0].linesize, + s->rects[0].h, transp_color)) + x1++; + x2 = s->rects[0].w - 1; + while (x2 > 0 && is_transp(s->rects[0].bitmap + x2, s->rects[0].linesize, s->rects[0].h, + transp_color)) + x2--; + w = x2 - x1 + 1; + h = y2 - y1 + 1; + bitmap = av_malloc(w * h); + if (!bitmap) + return 1; + for(y = 0; y < h; y++) { + memcpy(bitmap + w * y, s->rects[0].bitmap + x1 + (y1 + y) * s->rects[0].linesize, w); + } + av_freep(&s->rects[0].bitmap); + s->rects[0].bitmap = bitmap; + s->rects[0].linesize = w; + s->rects[0].w = w; + s->rects[0].h = h; + s->rects[0].x += x1; + s->rects[0].y += y1; + return 1; +} + +static int dvdsub_close_decoder(AVCodecContext *avctx) +{ + return 0; +} + +#ifdef DEBUG +#undef fprintf +static void ppm_save(const char *filename, uint8_t *bitmap, int w, int h, + uint32_t *rgba_palette) +{ + int x, y, v; + FILE *f; + + f = fopen(filename, "w"); + if (!f) { + perror(filename); + exit(1); + } + fprintf(f, "P6\n" + "%d %d\n" + "%d\n", + w, h, 255); + for(y = 0; y < h; y++) { + for(x = 0; x < w; x++) { + v = rgba_palette[bitmap[y * w + x]]; + putc((v >> 16) & 0xff, f); + putc((v >> 8) & 0xff, f); + putc((v >> 0) & 0xff, f); + } + } + fclose(f); +} +#endif + +static int dvdsub_decode(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + AVSubtitle *sub = (void *)data; + int is_menu; + + is_menu = decode_dvd_subtitles(sub, buf, buf_size); + + if (is_menu < 0) { + no_subtitle: + *data_size = 0; + + return buf_size; + } + if (!is_menu && find_smallest_bounding_rectangle(sub) == 0) + goto no_subtitle; + +#if defined(DEBUG) + av_log(NULL, AV_LOG_INFO, "start=%d ms end =%d ms\n", + sub->start_display_time, + sub->end_display_time); + ppm_save("/tmp/a.ppm", sub->rects[0].bitmap, + sub->rects[0].w, sub->rects[0].h, sub->rects[0].rgba_palette); +#endif + + *data_size = 1; + return buf_size; +} + +AVCodec dvdsub_decoder = { + "dvdsub", + CODEC_TYPE_SUBTITLE, + CODEC_ID_DVD_SUBTITLE, + sizeof(DVDSubContext), + dvdsub_init_decoder, + NULL, + dvdsub_close_decoder, + dvdsub_decode, +}; + +/* parser definition */ +typedef struct DVDSubParseContext { + uint8_t *packet; + int packet_len; + int packet_index; +} DVDSubParseContext; + +static int dvdsub_parse_init(AVCodecParserContext *s) +{ + return 0; +} + +static int dvdsub_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + DVDSubParseContext *pc = s->priv_data; + + if (pc->packet_index == 0) { + if (buf_size < 2) + return 0; + pc->packet_len = (buf[0] << 8) | buf[1]; + av_freep(&pc->packet); + pc->packet = av_malloc(pc->packet_len); + } + if (pc->packet) { + if (pc->packet_index + buf_size <= pc->packet_len) { + memcpy(pc->packet + pc->packet_index, buf, buf_size); + pc->packet_index += buf_size; + if (pc->packet_index >= pc->packet_len) { + *poutbuf = pc->packet; + *poutbuf_size = pc->packet_len; + pc->packet_index = 0; + return buf_size; + } + } else { + /* erroneous size */ + pc->packet_index = 0; + } + } + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; +} + +static void dvdsub_parse_close(AVCodecParserContext *s) +{ + DVDSubParseContext *pc = s->priv_data; + av_freep(&pc->packet); +} + +AVCodecParser dvdsub_parser = { + { CODEC_ID_DVD_SUBTITLE }, + sizeof(DVDSubParseContext), + dvdsub_parse_init, + dvdsub_parse, + dvdsub_parse_close, +}; diff --git a/mpeg4/src/libavcodec/dvdsubenc.c b/mpeg4/src/libavcodec/dvdsubenc.c new file mode 100644 index 0000000000000000000000000000000000000000..1d423af5b37738c59dd6cfd49f01ee364cfaea78 --- /dev/null +++ b/mpeg4/src/libavcodec/dvdsubenc.c @@ -0,0 +1,248 @@ +/* + * DVD subtitle encoding for ffmpeg + * Copyright (c) 2005 Wolfram Gloger. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "avcodec.h" + +#undef NDEBUG +#include + +typedef struct DVDSubtitleContext { +} DVDSubtitleContext; + +// ncnt is the nibble counter +#define PUTNIBBLE(val)\ +do {\ + if (ncnt++ & 1)\ + *q++ = bitbuf | ((val) & 0x0f);\ + else\ + bitbuf = (val) << 4;\ +} while(0) + +static void dvd_encode_rle(uint8_t **pq, + const uint8_t *bitmap, int linesize, + int w, int h, + const int cmap[256]) +{ + uint8_t *q; + unsigned int bitbuf = 0; + int ncnt; + int x, y, len, color; + + q = *pq; + + for (y = 0; y < h; ++y) { + ncnt = 0; + for(x = 0; x < w; x += len) { + color = bitmap[x]; + for (len=1; x+len < w; ++len) + if (bitmap[x+len] != color) + break; + color = cmap[color]; + assert(color < 4); + if (len < 0x04) { + PUTNIBBLE((len << 2)|color); + } else if (len < 0x10) { + PUTNIBBLE(len >> 2); + PUTNIBBLE((len << 2)|color); + } else if (len < 0x40) { + PUTNIBBLE(0); + PUTNIBBLE(len >> 2); + PUTNIBBLE((len << 2)|color); + } else if (x+len == w) { + PUTNIBBLE(0); + PUTNIBBLE(0); + PUTNIBBLE(0); + PUTNIBBLE(color); + } else { + if (len > 0xff) + len = 0xff; + PUTNIBBLE(0); + PUTNIBBLE(len >> 6); + PUTNIBBLE(len >> 2); + PUTNIBBLE((len << 2)|color); + } + } + /* end of line */ + if (ncnt & 1) + PUTNIBBLE(0); + bitmap += linesize; + } + + *pq = q; +} + +static inline void putbe16(uint8_t **pq, uint16_t v) +{ + uint8_t *q = *pq; + *q++ = v >> 8; + *q++ = v; + *pq = q; +} + +static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size, + const AVSubtitle *h) +{ + uint8_t *q, *qq; + int object_id; + int offset1[20], offset2[20]; + int i, imax, color, alpha, rects = h->num_rects; + unsigned long hmax; + unsigned long hist[256]; + int cmap[256]; + + if (rects == 0 || h->rects == NULL) + return -1; + if (rects > 20) + rects = 20; + + // analyze bitmaps, compress to 4 colors + for (i=0; i<256; ++i) { + hist[i] = 0; + cmap[i] = 0; + } + for (object_id = 0; object_id < rects; object_id++) + for (i=0; irects[object_id].w*h->rects[object_id].h; ++i) { + color = h->rects[object_id].bitmap[i]; + // only count non-transparent pixels + alpha = h->rects[object_id].rgba_palette[color] >> 24; + hist[color] += alpha; + } + for (color=3;; --color) { + hmax = 0; + imax = 0; + for (i=0; i<256; ++i) + if (hist[i] > hmax) { + imax = i; + hmax = hist[i]; + } + if (hmax == 0) + break; + if (color == 0) + color = 3; + av_log(NULL, AV_LOG_DEBUG, "dvd_subtitle hist[%d]=%ld -> col %d\n", + imax, hist[imax], color); + cmap[imax] = color; + hist[imax] = 0; + } + + + // encode data block + q = outbuf + 4; + for (object_id = 0; object_id < rects; object_id++) { + offset1[object_id] = q - outbuf; + // worst case memory requirement: 1 nibble per pixel.. + if ((q - outbuf) + h->rects[object_id].w*h->rects[object_id].h/2 + + 17*rects + 21 > outbuf_size) { + av_log(NULL, AV_LOG_ERROR, "dvd_subtitle too big\n"); + return -1; + } + dvd_encode_rle(&q, h->rects[object_id].bitmap, + h->rects[object_id].w*2, + h->rects[object_id].w, h->rects[object_id].h >> 1, + cmap); + offset2[object_id] = q - outbuf; + dvd_encode_rle(&q, h->rects[object_id].bitmap + h->rects[object_id].w, + h->rects[object_id].w*2, + h->rects[object_id].w, h->rects[object_id].h >> 1, + cmap); + } + + // set data packet size + qq = outbuf + 2; + putbe16(&qq, q - outbuf); + + // send start display command + putbe16(&q, (h->start_display_time*90) >> 10); + putbe16(&q, (q - outbuf) /*- 2 */ + 8 + 12*rects + 2); + *q++ = 0x03; // palette - 4 nibbles + *q++ = 0x03; *q++ = 0x7f; + *q++ = 0x04; // alpha - 4 nibbles + *q++ = 0xf0; *q++ = 0x00; + //*q++ = 0x0f; *q++ = 0xff; + + // XXX not sure if more than one rect can really be encoded.. + // 12 bytes per rect + for (object_id = 0; object_id < rects; object_id++) { + int x2 = h->rects[object_id].x + h->rects[object_id].w - 1; + int y2 = h->rects[object_id].y + h->rects[object_id].h - 1; + + *q++ = 0x05; + // x1 x2 -> 6 nibbles + *q++ = h->rects[object_id].x >> 4; + *q++ = (h->rects[object_id].x << 4) | ((x2 >> 8) & 0xf); + *q++ = x2; + // y1 y2 -> 6 nibbles + *q++ = h->rects[object_id].y >> 4; + *q++ = (h->rects[object_id].y << 4) | ((y2 >> 8) & 0xf); + *q++ = y2; + + *q++ = 0x06; + // offset1, offset2 + putbe16(&q, offset1[object_id]); + putbe16(&q, offset2[object_id]); + } + *q++ = 0x01; // start command + *q++ = 0xff; // terminating command + + // send stop display command last + putbe16(&q, (h->end_display_time*90) >> 10); + putbe16(&q, (q - outbuf) - 2 /*+ 4*/); + *q++ = 0x02; // set end + *q++ = 0xff; // terminating command + + qq = outbuf; + putbe16(&qq, q - outbuf); + + av_log(NULL, AV_LOG_DEBUG, "subtitle_packet size=%td\n", q - outbuf); + return q - outbuf; +} + +static int dvdsub_init_encoder(AVCodecContext *avctx) +{ + return 0; +} + +static int dvdsub_close_encoder(AVCodecContext *avctx) +{ + return 0; +} + +static int dvdsub_encode(AVCodecContext *avctx, + unsigned char *buf, int buf_size, void *data) +{ + //DVDSubtitleContext *s = avctx->priv_data; + AVSubtitle *sub = data; + int ret; + + ret = encode_dvd_subtitles(buf, buf_size, sub); + return ret; +} + +AVCodec dvdsub_encoder = { + "dvdsub", + CODEC_TYPE_SUBTITLE, + CODEC_ID_DVD_SUBTITLE, + sizeof(DVDSubtitleContext), + dvdsub_init_encoder, + dvdsub_encode, + dvdsub_close_encoder, +}; + +/* Local Variables: */ +/* c-basic-offset:4 */ +/* End: */ diff --git a/mpeg4/src/libavcodec/error_resilience.c b/mpeg4/src/libavcodec/error_resilience.c new file mode 100644 index 0000000000000000000000000000000000000000..9912044ecc97a541cf4bb274e410f2d6c9a5dcb8 --- /dev/null +++ b/mpeg4/src/libavcodec/error_resilience.c @@ -0,0 +1,1028 @@ +/* + * Error resilience / concealment + * + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file error_resilience.c + * Error resilience / concealment. + */ + +#include + +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" +#include "common.h" + +static void decode_mb(MpegEncContext *s){ + s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* s->linesize ) + s->mb_x * 16; + s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * s->uvlinesize) + s->mb_x * 8; + s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * s->uvlinesize) + s->mb_x * 8; + + MPV_decode_mb(s, s->block); +} + +/** + * replaces the current MB with a flat dc only version. + */ +static void put_dc(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int mb_x, int mb_y) +{ + int dc, dcu, dcv, y, i; + for(i=0; i<4; i++){ + dc= s->dc_val[0][mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*s->b8_stride]; + if(dc<0) dc=0; + else if(dc>2040) dc=2040; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dest_y[x + (i&1)*8 + (y + (i>>1)*8)*s->linesize]= dc/8; + } + } + } + dcu = s->dc_val[1][mb_x + mb_y*s->mb_stride]; + dcv = s->dc_val[2][mb_x + mb_y*s->mb_stride]; + if (dcu<0 ) dcu=0; + else if(dcu>2040) dcu=2040; + if (dcv<0 ) dcv=0; + else if(dcv>2040) dcv=2040; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dest_cb[x + y*(s->uvlinesize)]= dcu/8; + dest_cr[x + y*(s->uvlinesize)]= dcv/8; + } + } +} + +static void filter181(int16_t *data, int width, int height, int stride){ + int x,y; + + /* horizontal filter */ + for(y=1; y>16; + prev_dc= data[x + y*stride]; + data[x + y*stride]= dc; + } + } + + /* vertical filter */ + for(x=1; x>16; + prev_dc= data[x + y*stride]; + data[x + y*stride]= dc; + } + } +} + +/** + * guess the dc of blocks which dont have a undamaged dc + * @param w width in 8 pixel blocks + * @param h height in 8 pixel blocks + */ +static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){ + int b_x, b_y; + + for(b_y=0; b_y>is_luma) + (b_y>>is_luma)*s->mb_stride; + + error= s->error_status_table[mb_index]; + + if(IS_INTER(s->current_picture.mb_type[mb_index])) continue; //inter + if(!(error&DC_ERROR)) continue; //dc-ok + + /* right block */ + for(j=b_x+1; j>is_luma) + (b_y>>is_luma)*s->mb_stride; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]); + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[0]= dc[j + b_y*stride]; + distance[0]= j-b_x; + break; + } + } + + /* left block */ + for(j=b_x-1; j>=0; j--){ + int mb_index_j= (j>>is_luma) + (b_y>>is_luma)*s->mb_stride; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]); + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[1]= dc[j + b_y*stride]; + distance[1]= b_x-j; + break; + } + } + + /* bottom block */ + for(j=b_y+1; j>is_luma) + (j>>is_luma)*s->mb_stride; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]); + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[2]= dc[b_x + j*stride]; + distance[2]= j-b_y; + break; + } + } + + /* top block */ + for(j=b_y-1; j>=0; j--){ + int mb_index_j= (b_x>>is_luma) + (j>>is_luma)*s->mb_stride; + int error_j= s->error_status_table[mb_index_j]; + int intra_j= IS_INTRA(s->current_picture.mb_type[mb_index_j]); + if(intra_j==0 || !(error_j&DC_ERROR)){ + color[3]= dc[b_x + j*stride]; + distance[3]= b_y-j; + break; + } + } + + weight_sum=0; + guess=0; + for(j=0; j<4; j++){ + int64_t weight= 256*256*256*16/distance[j]; + guess+= weight*(int64_t)color[j]; + weight_sum+= weight; + } + guess= (guess + weight_sum/2) / weight_sum; + + dc[b_x + b_y*stride]= guess; + } + } +} + +/** + * simple horizontal deblocking filter used for error resilience + * @param w width in 8 pixel blocks + * @param h height in 8 pixel blocks + */ +static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ + int b_x, b_y; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + for(b_y=0; b_yerror_status_table[( b_x >>is_luma) + (b_y>>is_luma)*s->mb_stride]; + int right_status= s->error_status_table[((b_x+1)>>is_luma) + (b_y>>is_luma)*s->mb_stride]; + int left_intra= IS_INTRA(s->current_picture.mb_type [( b_x >>is_luma) + (b_y>>is_luma)*s->mb_stride]); + int right_intra= IS_INTRA(s->current_picture.mb_type [((b_x+1)>>is_luma) + (b_y>>is_luma)*s->mb_stride]); + int left_damage = left_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int right_damage= right_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int offset= b_x*8 + b_y*stride*8; + int16_t *left_mv= s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ( b_x <<(1-is_luma))]; + int16_t *right_mv= s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ((b_x+1)<<(1-is_luma))]; + + if(!(left_damage||right_damage)) continue; // both undamaged + + if( (!left_intra) && (!right_intra) + && ABS(left_mv[0]-right_mv[0]) + ABS(left_mv[1]+right_mv[1]) < 2) continue; + + for(y=0; y<8; y++){ + int a,b,c,d; + + a= dst[offset + 7 + y*stride] - dst[offset + 6 + y*stride]; + b= dst[offset + 8 + y*stride] - dst[offset + 7 + y*stride]; + c= dst[offset + 9 + y*stride] - dst[offset + 8 + y*stride]; + + d= ABS(b) - ((ABS(a) + ABS(c) + 1)>>1); + d= FFMAX(d, 0); + if(b<0) d= -d; + + if(d==0) continue; + + if(!(left_damage && right_damage)) + d= d*16/9; + + if(left_damage){ + dst[offset + 7 + y*stride] = cm[dst[offset + 7 + y*stride] + ((d*7)>>4)]; + dst[offset + 6 + y*stride] = cm[dst[offset + 6 + y*stride] + ((d*5)>>4)]; + dst[offset + 5 + y*stride] = cm[dst[offset + 5 + y*stride] + ((d*3)>>4)]; + dst[offset + 4 + y*stride] = cm[dst[offset + 4 + y*stride] + ((d*1)>>4)]; + } + if(right_damage){ + dst[offset + 8 + y*stride] = cm[dst[offset + 8 + y*stride] - ((d*7)>>4)]; + dst[offset + 9 + y*stride] = cm[dst[offset + 9 + y*stride] - ((d*5)>>4)]; + dst[offset + 10+ y*stride] = cm[dst[offset +10 + y*stride] - ((d*3)>>4)]; + dst[offset + 11+ y*stride] = cm[dst[offset +11 + y*stride] - ((d*1)>>4)]; + } + } + } + } +} + +/** + * simple vertical deblocking filter used for error resilience + * @param w width in 8 pixel blocks + * @param h height in 8 pixel blocks + */ +static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){ + int b_x, b_y; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + for(b_y=0; b_yerror_status_table[(b_x>>is_luma) + ( b_y >>is_luma)*s->mb_stride]; + int bottom_status= s->error_status_table[(b_x>>is_luma) + ((b_y+1)>>is_luma)*s->mb_stride]; + int top_intra= IS_INTRA(s->current_picture.mb_type [(b_x>>is_luma) + ( b_y >>is_luma)*s->mb_stride]); + int bottom_intra= IS_INTRA(s->current_picture.mb_type [(b_x>>is_luma) + ((b_y+1)>>is_luma)*s->mb_stride]); + int top_damage = top_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int bottom_damage= bottom_status&(DC_ERROR|AC_ERROR|MV_ERROR); + int offset= b_x*8 + b_y*stride*8; + int16_t *top_mv= s->current_picture.motion_val[0][s->b8_stride*( b_y <<(1-is_luma)) + (b_x<<(1-is_luma))]; + int16_t *bottom_mv= s->current_picture.motion_val[0][s->b8_stride*((b_y+1)<<(1-is_luma)) + (b_x<<(1-is_luma))]; + + if(!(top_damage||bottom_damage)) continue; // both undamaged + + if( (!top_intra) && (!bottom_intra) + && ABS(top_mv[0]-bottom_mv[0]) + ABS(top_mv[1]+bottom_mv[1]) < 2) continue; + + for(x=0; x<8; x++){ + int a,b,c,d; + + a= dst[offset + x + 7*stride] - dst[offset + x + 6*stride]; + b= dst[offset + x + 8*stride] - dst[offset + x + 7*stride]; + c= dst[offset + x + 9*stride] - dst[offset + x + 8*stride]; + + d= ABS(b) - ((ABS(a) + ABS(c)+1)>>1); + d= FFMAX(d, 0); + if(b<0) d= -d; + + if(d==0) continue; + + if(!(top_damage && bottom_damage)) + d= d*16/9; + + if(top_damage){ + dst[offset + x + 7*stride] = cm[dst[offset + x + 7*stride] + ((d*7)>>4)]; + dst[offset + x + 6*stride] = cm[dst[offset + x + 6*stride] + ((d*5)>>4)]; + dst[offset + x + 5*stride] = cm[dst[offset + x + 5*stride] + ((d*3)>>4)]; + dst[offset + x + 4*stride] = cm[dst[offset + x + 4*stride] + ((d*1)>>4)]; + } + if(bottom_damage){ + dst[offset + x + 8*stride] = cm[dst[offset + x + 8*stride] - ((d*7)>>4)]; + dst[offset + x + 9*stride] = cm[dst[offset + x + 9*stride] - ((d*5)>>4)]; + dst[offset + x + 10*stride] = cm[dst[offset + x + 10*stride] - ((d*3)>>4)]; + dst[offset + x + 11*stride] = cm[dst[offset + x + 11*stride] - ((d*1)>>4)]; + } + } + } + } +} + +static void guess_mv(MpegEncContext *s){ + uint8_t fixed[s->mb_stride * s->mb_height]; +#define MV_FROZEN 3 +#define MV_CHANGED 2 +#define MV_UNCHANGED 1 + const int mb_stride = s->mb_stride; + const int mb_width = s->mb_width; + const int mb_height= s->mb_height; + int i, depth, num_avail; + int mb_x, mb_y; + + num_avail=0; + for(i=0; imb_num; i++){ + const int mb_xy= s->mb_index2xy[ i ]; + int f=0; + int error= s->error_status_table[mb_xy]; + + if(IS_INTRA(s->current_picture.mb_type[mb_xy])) f=MV_FROZEN; //intra //FIXME check + if(!(error&MV_ERROR)) f=MV_FROZEN; //inter with undamaged MV + + fixed[mb_xy]= f; + if(f==MV_FROZEN) + num_avail++; + } + + if((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || num_avail <= mb_width/2){ + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + const int mb_xy= mb_x + mb_y*s->mb_stride; + + if(IS_INTRA(s->current_picture.mb_type[mb_xy])) continue; + if(!(s->error_status_table[mb_xy]&MV_ERROR)) continue; + + s->mv_dir = MV_DIR_FORWARD; + s->mb_intra=0; + s->mv_type = MV_TYPE_16X16; + s->mb_skipped=0; + + s->dsp.clear_blocks(s->block[0]); + + s->mb_x= mb_x; + s->mb_y= mb_y; + s->mv[0][0][0]= 0; + s->mv[0][0][1]= 0; + decode_mb(s); + } + } + return; + } + + for(depth=0;; depth++){ + int changed, pass, none_left; + + none_left=1; + changed=1; + for(pass=0; (changed || pass<2) && pass<10; pass++){ + int mb_x, mb_y; +int score_sum=0; + + changed=0; + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + const int mb_xy= mb_x + mb_y*s->mb_stride; + int mv_predictor[8][2]={{0}}; + int pred_count=0; + int j; + int best_score=256*256*256*64; + int best_pred=0; + const int mot_stride= s->b8_stride; + const int mot_index= mb_x*2 + mb_y*2*mot_stride; + int prev_x= s->current_picture.motion_val[0][mot_index][0]; + int prev_y= s->current_picture.motion_val[0][mot_index][1]; + + if((mb_x^mb_y^pass)&1) continue; + + if(fixed[mb_xy]==MV_FROZEN) continue; + assert(!IS_INTRA(s->current_picture.mb_type[mb_xy])); + assert(s->last_picture_ptr && s->last_picture_ptr->data[0]); + + j=0; + if(mb_x>0 && fixed[mb_xy-1 ]==MV_FROZEN) j=1; + if(mb_x+10 && fixed[mb_xy-mb_stride]==MV_FROZEN) j=1; + if(mb_y+10 && fixed[mb_xy-1 ]==MV_CHANGED) j=1; + if(mb_x+10 && fixed[mb_xy-mb_stride]==MV_CHANGED) j=1; + if(mb_y+11) continue; + + none_left=0; + + if(mb_x>0 && fixed[mb_xy-1]){ + mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - 2][0]; + mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - 2][1]; + pred_count++; + } + if(mb_x+1current_picture.motion_val[0][mot_index + 2][0]; + mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + 2][1]; + pred_count++; + } + if(mb_y>0 && fixed[mb_xy-mb_stride]){ + mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - mot_stride*2][0]; + mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - mot_stride*2][1]; + pred_count++; + } + if(mb_y+1current_picture.motion_val[0][mot_index + mot_stride*2][0]; + mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index + mot_stride*2][1]; + pred_count++; + } + if(pred_count==0) continue; + + if(pred_count>1){ + int sum_x=0, sum_y=0; + int max_x, max_y, min_x, min_y; + + for(j=0; j=3){ + min_y= min_x= 99999; + max_y= max_x=-99999; + }else{ + min_x=min_y=max_x=max_y=0; + } + for(j=0; jcurrent_picture.motion_val[0][mot_index][0]; + mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index][1]; + pred_count++; + + s->mv_dir = MV_DIR_FORWARD; + s->mb_intra=0; + s->mv_type = MV_TYPE_16X16; + s->mb_skipped=0; + + s->dsp.clear_blocks(s->block[0]); + + s->mb_x= mb_x; + s->mb_y= mb_y; + + for(j=0; jcurrent_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; + + s->current_picture.motion_val[0][mot_index][0]= s->mv[0][0][0]= mv_predictor[j][0]; + s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[j][1]; + + decode_mb(s); + + if(mb_x>0 && fixed[mb_xy-1]){ + int k; + for(k=0; k<16; k++) + score += ABS(src[k*s->linesize-1 ]-src[k*s->linesize ]); + } + if(mb_x+1linesize+15]-src[k*s->linesize+16]); + } + if(mb_y>0 && fixed[mb_xy-mb_stride]){ + int k; + for(k=0; k<16; k++) + score += ABS(src[k-s->linesize ]-src[k ]); + } + if(mb_y+1linesize*15]-src[k+s->linesize*16]); + } + + if(score <= best_score){ // <= will favor the last MV + best_score= score; + best_pred= j; + } + } +score_sum+= best_score; +//FIXME no need to set s->current_picture.motion_val[0][mot_index][0] explicit + s->current_picture.motion_val[0][mot_index][0]= s->mv[0][0][0]= mv_predictor[best_pred][0]; + s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[best_pred][1]; + + decode_mb(s); + + + if(s->mv[0][0][0] != prev_x || s->mv[0][0][1] != prev_y){ + fixed[mb_xy]=MV_CHANGED; + changed++; + }else + fixed[mb_xy]=MV_UNCHANGED; + } + } + +// printf(".%d/%d", changed, score_sum); fflush(stdout); + } + + if(none_left) + return; + + for(i=0; imb_num; i++){ + int mb_xy= s->mb_index2xy[i]; + if(fixed[mb_xy]) + fixed[mb_xy]=MV_FROZEN; + } +// printf(":"); fflush(stdout); + } +} + +static int is_intra_more_likely(MpegEncContext *s){ + int is_intra_likely, i, j, undamaged_count, skip_amount, mb_x, mb_y; + + if(s->last_picture_ptr==NULL) return 1; //no previous frame available -> use spatial prediction + + undamaged_count=0; + for(i=0; imb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + const int error= s->error_status_table[mb_xy]; + if(!((error&DC_ERROR) && (error&MV_ERROR))) + undamaged_count++; + } + + if(undamaged_count < 5) return 0; //allmost all MBs damaged -> use temporal prediction + + skip_amount= FFMAX(undamaged_count/50, 1); //check only upto 50 MBs + is_intra_likely=0; + + j=0; + for(mb_y= 0; mb_ymb_height-1; mb_y++){ + for(mb_x= 0; mb_xmb_width; mb_x++){ + int error; + const int mb_xy= mb_x + mb_y*s->mb_stride; + + error= s->error_status_table[mb_xy]; + if((error&DC_ERROR) && (error&MV_ERROR)) + continue; //skip damaged + + j++; + if((j%skip_amount) != 0) continue; //skip a few to speed things up + + if(s->pict_type==I_TYPE){ + uint8_t *mb_ptr = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; + uint8_t *last_mb_ptr= s->last_picture.data [0] + mb_x*16 + mb_y*16*s->linesize; + + is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr , s->linesize, 16); + is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16); + }else{ + if(IS_INTRA(s->current_picture.mb_type[mb_xy])) + is_intra_likely++; + else + is_intra_likely--; + } + } + } +//printf("is_intra_likely: %d type:%d\n", is_intra_likely, s->pict_type); + return is_intra_likely > 0; +} + +void ff_er_frame_start(MpegEncContext *s){ + if(!s->error_resilience) return; + + memset(s->error_status_table, MV_ERROR|AC_ERROR|DC_ERROR|VP_START|AC_END|DC_END|MV_END, s->mb_stride*s->mb_height*sizeof(uint8_t)); + s->error_count= 3*s->mb_num; +} + +/** + * adds a slice. + * @param endx x component of the last macroblock, can be -1 for the last of the previous line + * @param status the status at the end (MV_END, AC_ERROR, ...), it is assumed that no earlier end or + * error of the same type occured + */ +void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int endy, int status){ + const int start_i= clip(startx + starty * s->mb_width , 0, s->mb_num-1); + const int end_i = clip(endx + endy * s->mb_width , 0, s->mb_num); + const int start_xy= s->mb_index2xy[start_i]; + const int end_xy = s->mb_index2xy[end_i]; + int mask= -1; + + if(!s->error_resilience) return; + + mask &= ~VP_START; + if(status & (AC_ERROR|AC_END)){ + mask &= ~(AC_ERROR|AC_END); + s->error_count -= end_i - start_i + 1; + } + if(status & (DC_ERROR|DC_END)){ + mask &= ~(DC_ERROR|DC_END); + s->error_count -= end_i - start_i + 1; + } + if(status & (MV_ERROR|MV_END)){ + mask &= ~(MV_ERROR|MV_END); + s->error_count -= end_i - start_i + 1; + } + + if(status & (AC_ERROR|DC_ERROR|MV_ERROR)) s->error_count= INT_MAX; + + if(mask == ~0x7F){ + memset(&s->error_status_table[start_xy], 0, (end_xy - start_xy) * sizeof(uint8_t)); + }else{ + int i; + for(i=start_xy; ierror_status_table[ i ] &= mask; + } + } + + if(end_i == s->mb_num) + s->error_count= INT_MAX; + else{ + s->error_status_table[end_xy] &= mask; + s->error_status_table[end_xy] |= status; + } + + s->error_status_table[start_xy] |= VP_START; + + if(start_xy > 0 && s->avctx->thread_count <= 1 && s->avctx->skip_top*s->mb_width < start_i){ + int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ]; + + prev_status &= ~ VP_START; + if(prev_status != (MV_END|DC_END|AC_END)) s->error_count= INT_MAX; + } +} + +void ff_er_frame_end(MpegEncContext *s){ + int i, mb_x, mb_y, error, error_type, dc_error, mv_error, ac_error; + int distance; + int threshold_part[4]= {100,100,100}; + int threshold= 50; + int is_intra_likely; + int size = s->b8_stride * 2 * s->mb_height; + Picture *pic= s->current_picture_ptr; + + if(!s->error_resilience || s->error_count==0 || + s->error_count==3*s->mb_width*(s->avctx->skip_top + s->avctx->skip_bottom)) return; + + if(s->current_picture.motion_val[0] == NULL){ + av_log(s->avctx, AV_LOG_ERROR, "Warning MVs not available\n"); + + for(i=0; i<2; i++){ + pic->ref_index[i]= av_mallocz(size * sizeof(uint8_t)); + pic->motion_val_base[i]= av_mallocz((size+4) * 2 * sizeof(uint16_t)); + pic->motion_val[i]= pic->motion_val_base[i]+4; + } + pic->motion_subsample_log2= 3; + s->current_picture= *s->current_picture_ptr; + } + + for(i=0; i<2; i++){ + if(pic->ref_index[i]) + memset(pic->ref_index[i], 0, size * sizeof(uint8_t)); + } + + if(s->avctx->debug&FF_DEBUG_ER){ + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + int status= s->error_status_table[mb_x + mb_y*s->mb_stride]; + + av_log(s->avctx, AV_LOG_DEBUG, "%2X ", status); + } + av_log(s->avctx, AV_LOG_DEBUG, "\n"); + } + } + +#if 1 + /* handle overlapping slices */ + for(error_type=1; error_type<=3; error_type++){ + int end_ok=0; + + for(i=s->mb_num-1; i>=0; i--){ + const int mb_xy= s->mb_index2xy[i]; + int error= s->error_status_table[mb_xy]; + + if(error&(1<error_status_table[mb_xy]|= 1<partitioned_frame){ + int end_ok=0; + + for(i=s->mb_num-1; i>=0; i--){ + const int mb_xy= s->mb_index2xy[i]; + int error= s->error_status_table[mb_xy]; + + if(error&AC_END) + end_ok=0; + if((error&MV_END) || (error&DC_END) || (error&AC_ERROR)) + end_ok=1; + + if(!end_ok) + s->error_status_table[mb_xy]|= AC_ERROR; + + if(error&VP_START) + end_ok=0; + } + } +#endif + /* handle missing slices */ + if(s->error_resilience>=4){ + int end_ok=1; + + for(i=s->mb_num-2; i>=s->mb_width+100; i--){ //FIXME +100 hack + const int mb_xy= s->mb_index2xy[i]; + int error1= s->error_status_table[mb_xy ]; + int error2= s->error_status_table[s->mb_index2xy[i+1]]; + + if(error1&VP_START) + end_ok=1; + + if( error2==(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END) + && error1!=(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END) + && ((error1&AC_END) || (error1&DC_END) || (error1&MV_END))){ //end & uninited + end_ok=0; + } + + if(!end_ok) + s->error_status_table[mb_xy]|= DC_ERROR|AC_ERROR|MV_ERROR; + } + } + +#if 1 + /* backward mark errors */ + distance=9999999; + for(error_type=1; error_type<=3; error_type++){ + for(i=s->mb_num-1; i>=0; i--){ + const int mb_xy= s->mb_index2xy[i]; + int error= s->error_status_table[mb_xy]; + + if(!s->mbskip_table[mb_xy]) //FIXME partition specific + distance++; + if(error&(1<partitioned_frame){ + if(distance < threshold_part[error_type-1]) + s->error_status_table[mb_xy]|= 1<error_status_table[mb_xy]|= 1<mb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + int old_error= s->error_status_table[mb_xy]; + + if(old_error&VP_START) + error= old_error& (DC_ERROR|AC_ERROR|MV_ERROR); + else{ + error|= old_error& (DC_ERROR|AC_ERROR|MV_ERROR); + s->error_status_table[mb_xy]|= error; + } + } +#if 1 + /* handle not partitioned case */ + if(!s->partitioned_frame){ + for(i=0; imb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + error= s->error_status_table[mb_xy]; + if(error&(AC_ERROR|DC_ERROR|MV_ERROR)) + error|= AC_ERROR|DC_ERROR|MV_ERROR; + s->error_status_table[mb_xy]= error; + } + } +#endif + + dc_error= ac_error= mv_error=0; + for(i=0; imb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + error= s->error_status_table[mb_xy]; + if(error&DC_ERROR) dc_error ++; + if(error&AC_ERROR) ac_error ++; + if(error&MV_ERROR) mv_error ++; + } + av_log(s->avctx, AV_LOG_INFO, "concealing %d DC, %d AC, %d MV errors\n", dc_error, ac_error, mv_error); + + is_intra_likely= is_intra_more_likely(s); + + /* set unknown mb-type to most likely */ + for(i=0; imb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + error= s->error_status_table[mb_xy]; + if(!((error&DC_ERROR) && (error&MV_ERROR))) + continue; + + if(is_intra_likely) + s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA4x4; + else + s->current_picture.mb_type[mb_xy]= MB_TYPE_16x16 | MB_TYPE_L0; + } + + /* handle inter blocks with damaged AC */ + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + const int mb_xy= mb_x + mb_y * s->mb_stride; + const int mb_type= s->current_picture.mb_type[mb_xy]; + error= s->error_status_table[mb_xy]; + + if(IS_INTRA(mb_type)) continue; //intra + if(error&MV_ERROR) continue; //inter with damaged MV + if(!(error&AC_ERROR)) continue; //undamaged inter + + s->mv_dir = MV_DIR_FORWARD; + s->mb_intra=0; + s->mb_skipped=0; + if(IS_8X8(mb_type)){ + int mb_index= mb_x*2 + mb_y*2*s->b8_stride; + int j; + s->mv_type = MV_TYPE_8X8; + for(j=0; j<4; j++){ + s->mv[0][j][0] = s->current_picture.motion_val[0][ mb_index + (j&1) + (j>>1)*s->b8_stride ][0]; + s->mv[0][j][1] = s->current_picture.motion_val[0][ mb_index + (j&1) + (j>>1)*s->b8_stride ][1]; + } + }else{ + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][0]; + s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1]; + } + + s->dsp.clear_blocks(s->block[0]); + + s->mb_x= mb_x; + s->mb_y= mb_y; + decode_mb(s); + } + } + + /* guess MVs */ + if(s->pict_type==B_TYPE){ + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + int xy= mb_x*2 + mb_y*2*s->b8_stride; + const int mb_xy= mb_x + mb_y * s->mb_stride; + const int mb_type= s->current_picture.mb_type[mb_xy]; + error= s->error_status_table[mb_xy]; + + if(IS_INTRA(mb_type)) continue; + if(!(error&MV_ERROR)) continue; //inter with undamaged MV + if(!(error&AC_ERROR)) continue; //undamaged inter + + s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD; + s->mb_intra=0; + s->mv_type = MV_TYPE_16X16; + s->mb_skipped=0; + + if(s->pp_time){ + int time_pp= s->pp_time; + int time_pb= s->pb_time; + + s->mv[0][0][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp; + s->mv[0][0][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp; + s->mv[1][0][0] = s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp; + s->mv[1][0][1] = s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp; + }else{ + s->mv[0][0][0]= 0; + s->mv[0][0][1]= 0; + s->mv[1][0][0]= 0; + s->mv[1][0][1]= 0; + } + + s->dsp.clear_blocks(s->block[0]); + s->mb_x= mb_x; + s->mb_y= mb_y; + decode_mb(s); + } + } + }else + guess_mv(s); + +#ifdef HAVE_XVMC + /* the filters below are not XvMC compatible, skip them */ + if(s->avctx->xvmc_acceleration) goto ec_clean; +#endif + /* fill DC for inter blocks */ + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + int dc, dcu, dcv, y, n; + int16_t *dc_ptr; + uint8_t *dest_y, *dest_cb, *dest_cr; + const int mb_xy= mb_x + mb_y * s->mb_stride; + const int mb_type= s->current_picture.mb_type[mb_xy]; + + error= s->error_status_table[mb_xy]; + + if(IS_INTRA(mb_type) && s->partitioned_frame) continue; +// if(error&MV_ERROR) continue; //inter data damaged FIXME is this good? + + dest_y = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; + dest_cb= s->current_picture.data[1] + mb_x*8 + mb_y*8 *s->uvlinesize; + dest_cr= s->current_picture.data[2] + mb_x*8 + mb_y*8 *s->uvlinesize; + + dc_ptr= &s->dc_val[0][mb_x*2 + mb_y*2*s->b8_stride]; + for(n=0; n<4; n++){ + dc=0; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dc+= dest_y[x + (n&1)*8 + (y + (n>>1)*8)*s->linesize]; + } + } + dc_ptr[(n&1) + (n>>1)*s->b8_stride]= (dc+4)>>3; + } + + dcu=dcv=0; + for(y=0; y<8; y++){ + int x; + for(x=0; x<8; x++){ + dcu+=dest_cb[x + y*(s->uvlinesize)]; + dcv+=dest_cr[x + y*(s->uvlinesize)]; + } + } + s->dc_val[1][mb_x + mb_y*s->mb_stride]= (dcu+4)>>3; + s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3; + } + } +#if 1 + /* guess DC for damaged blocks */ + guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1); + guess_dc(s, s->dc_val[1], s->mb_width , s->mb_height , s->mb_stride, 0); + guess_dc(s, s->dc_val[2], s->mb_width , s->mb_height , s->mb_stride, 0); +#endif + /* filter luma DC */ + filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride); + +#if 1 + /* render DC only intra */ + for(mb_y=0; mb_ymb_height; mb_y++){ + for(mb_x=0; mb_xmb_width; mb_x++){ + uint8_t *dest_y, *dest_cb, *dest_cr; + const int mb_xy= mb_x + mb_y * s->mb_stride; + const int mb_type= s->current_picture.mb_type[mb_xy]; + + error= s->error_status_table[mb_xy]; + + if(IS_INTER(mb_type)) continue; + if(!(error&AC_ERROR)) continue; //undamaged + + dest_y = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize; + dest_cb= s->current_picture.data[1] + mb_x*8 + mb_y*8 *s->uvlinesize; + dest_cr= s->current_picture.data[2] + mb_x*8 + mb_y*8 *s->uvlinesize; + + put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y); + } + } +#endif + + if(s->avctx->error_concealment&FF_EC_DEBLOCK){ + /* filter horizontal block boundaries */ + h_block_filter(s, s->current_picture.data[0], s->mb_width*2, s->mb_height*2, s->linesize , 1); + h_block_filter(s, s->current_picture.data[1], s->mb_width , s->mb_height , s->uvlinesize, 0); + h_block_filter(s, s->current_picture.data[2], s->mb_width , s->mb_height , s->uvlinesize, 0); + + /* filter vertical block boundaries */ + v_block_filter(s, s->current_picture.data[0], s->mb_width*2, s->mb_height*2, s->linesize , 1); + v_block_filter(s, s->current_picture.data[1], s->mb_width , s->mb_height , s->uvlinesize, 0); + v_block_filter(s, s->current_picture.data[2], s->mb_width , s->mb_height , s->uvlinesize, 0); + } + +#ifdef HAVE_XVMC +ec_clean: +#endif + /* clean a few tables */ + for(i=0; imb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + int error= s->error_status_table[mb_xy]; + + if(s->pict_type!=B_TYPE && (error&(DC_ERROR|MV_ERROR|AC_ERROR))){ + s->mbskip_table[mb_xy]=0; + } + s->mbintra_table[mb_xy]=1; + } +} diff --git a/mpeg4/src/libavcodec/eval.c b/mpeg4/src/libavcodec/eval.c new file mode 100644 index 0000000000000000000000000000000000000000..5b0e51d627e248de3aafa5aa534ab183ceea65d2 --- /dev/null +++ b/mpeg4/src/libavcodec/eval.c @@ -0,0 +1,226 @@ +/* + * simple arithmetic expression evaluator + * + * Copyright (c) 2002 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file eval.c + * simple arithmetic expression evaluator. + * + * see http://joe.hotchkiss.com/programming/eval/eval.html + */ + +#include "avcodec.h" +#include "mpegvideo.h" + +#include +#include +#include +#include + +#ifndef NAN + #define NAN 0 +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +typedef struct Parser{ + int stack_index; + char *s; + double *const_value; + const char **const_name; // NULL terminated + double (**func1)(void *, double a); // NULL terminated + const char **func1_name; // NULL terminated + double (**func2)(void *, double a, double b); // NULL terminated + char **func2_name; // NULL terminated + void *opaque; +} Parser; + +static double evalExpression(Parser *p); + +static int strmatch(const char *s, const char *prefix){ + int i; + for(i=0; prefix[i]; i++){ + if(prefix[i] != s[i]) return 0; + } + return 1; +} + +static double evalPrimary(Parser *p){ + double d, d2=NAN; + char *next= p->s; + int i; + + /* number */ + d= strtod(p->s, &next); + if(next != p->s){ + p->s= next; + return d; + } + + /* named constants */ + for(i=0; p->const_name && p->const_name[i]; i++){ + if(strmatch(p->s, p->const_name[i])){ + p->s+= strlen(p->const_name[i]); + return p->const_value[i]; + } + } + + p->s= strchr(p->s, '('); + if(p->s==NULL){ + av_log(NULL, AV_LOG_ERROR, "Parser: missing ( in \"%s\"\n", next); + return NAN; + } + p->s++; // "(" + d= evalExpression(p); + if(p->s[0]== ','){ + p->s++; // "," + d2= evalExpression(p); + } + if(p->s[0] != ')'){ + av_log(NULL, AV_LOG_ERROR, "Parser: missing ) in \"%s\"\n", next); + return NAN; + } + p->s++; // ")" + + if( strmatch(next, "sinh" ) ) d= sinh(d); + else if( strmatch(next, "cosh" ) ) d= cosh(d); + else if( strmatch(next, "tanh" ) ) d= tanh(d); + else if( strmatch(next, "sin" ) ) d= sin(d); + else if( strmatch(next, "cos" ) ) d= cos(d); + else if( strmatch(next, "tan" ) ) d= tan(d); + else if( strmatch(next, "exp" ) ) d= exp(d); + else if( strmatch(next, "log" ) ) d= log(d); + else if( strmatch(next, "squish") ) d= 1/(1+exp(4*d)); + else if( strmatch(next, "gauss" ) ) d= exp(-d*d/2)/sqrt(2*M_PI); + else if( strmatch(next, "abs" ) ) d= fabs(d); + else if( strmatch(next, "max" ) ) d= d > d2 ? d : d2; + else if( strmatch(next, "min" ) ) d= d < d2 ? d : d2; + else if( strmatch(next, "gt" ) ) d= d > d2 ? 1.0 : 0.0; + else if( strmatch(next, "gte" ) ) d= d >= d2 ? 1.0 : 0.0; + else if( strmatch(next, "lt" ) ) d= d > d2 ? 0.0 : 1.0; + else if( strmatch(next, "lte" ) ) d= d >= d2 ? 0.0 : 1.0; + else if( strmatch(next, "eq" ) ) d= d == d2 ? 1.0 : 0.0; + else if( strmatch(next, "(" ) ) d= d; +// else if( strmatch(next, "l1" ) ) d= 1 + d2*(d - 1); +// else if( strmatch(next, "sq01" ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0; + else{ + for(i=0; p->func1_name && p->func1_name[i]; i++){ + if(strmatch(next, p->func1_name[i])){ + return p->func1[i](p->opaque, d); + } + } + + for(i=0; p->func2_name && p->func2_name[i]; i++){ + if(strmatch(next, p->func2_name[i])){ + return p->func2[i](p->opaque, d, d2); + } + } + + av_log(NULL, AV_LOG_ERROR, "Parser: unknown function in \"%s\"\n", next); + return NAN; + } + + return d; +} + +static double evalPow(Parser *p){ + int sign= (*p->s == '+') - (*p->s == '-'); + p->s += sign&1; + return (sign|1) * evalPrimary(p); +} + +static double evalFactor(Parser *p){ + double ret= evalPow(p); + while(p->s[0]=='^'){ + p->s++; + ret= pow(ret, evalPow(p)); + } + return ret; +} + +static double evalTerm(Parser *p){ + double ret= evalFactor(p); + while(p->s[0]=='*' || p->s[0]=='/'){ + if(*p->s++ == '*') ret*= evalFactor(p); + else ret/= evalFactor(p); + } + return ret; +} + +static double evalExpression(Parser *p){ + double ret= 0; + + if(p->stack_index <= 0) //protect against stack overflows + return NAN; + p->stack_index--; + + do{ + ret += evalTerm(p); + }while(*p->s == '+' || *p->s == '-'); + + p->stack_index++; + + return ret; +} + +double ff_eval(char *s, double *const_value, const char **const_name, + double (**func1)(void *, double), const char **func1_name, + double (**func2)(void *, double, double), char **func2_name, + void *opaque){ + Parser p; + + p.stack_index=100; + p.s= s; + p.const_value= const_value; + p.const_name = const_name; + p.func1 = func1; + p.func1_name = func1_name; + p.func2 = func2; + p.func2_name = func2_name; + p.opaque = opaque; + + return evalExpression(&p); +} + +#ifdef TEST +#undef printf +static double const_values[]={ + M_PI, + M_E, + 0 +}; +static const char *const_names[]={ + "PI", + "E", + 0 +}; +main(){ + int i; + printf("%f == 12.7\n", ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL)); + + for(i=0; i<1050; i++){ + START_TIMER + ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL); + STOP_TIMER("ff_eval") + } +} +#endif diff --git a/mpeg4/src/libavcodec/faac.c b/mpeg4/src/libavcodec/faac.c new file mode 100644 index 0000000000000000000000000000000000000000..2b7c5967848e1bad1050c47ea36559868d633ac0 --- /dev/null +++ b/mpeg4/src/libavcodec/faac.c @@ -0,0 +1,131 @@ +/* + * Interface to libfaac for aac encoding + * Copyright (c) 2002 Gildas Bazin + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file faacaudio.c + * Interface to libfaac for aac encoding. + */ + +#include "avcodec.h" +#include + +typedef struct FaacAudioContext { + faacEncHandle faac_handle; +} FaacAudioContext; + +static int Faac_encode_init(AVCodecContext *avctx) +{ + FaacAudioContext *s = avctx->priv_data; + faacEncConfigurationPtr faac_cfg; + unsigned long samples_input, max_bytes_output; + + /* number of channels */ + if (avctx->channels < 1 || avctx->channels > 6) + return -1; + + s->faac_handle = faacEncOpen(avctx->sample_rate, + avctx->channels, + &samples_input, &max_bytes_output); + + /* check faac version */ + faac_cfg = faacEncGetCurrentConfiguration(s->faac_handle); + if (faac_cfg->version != FAAC_CFG_VERSION) { + av_log(avctx, AV_LOG_ERROR, "wrong libfaac version (compiled for: %d, using %d)\n", FAAC_CFG_VERSION, faac_cfg->version); + faacEncClose(s->faac_handle); + return -1; + } + + /* put the options in the configuration struct */ + faac_cfg->aacObjectType = LOW; + faac_cfg->mpegVersion = MPEG4; + faac_cfg->useTns = 0; + faac_cfg->allowMidside = 1; + faac_cfg->bitRate = avctx->bit_rate / avctx->channels; + faac_cfg->bandWidth = avctx->cutoff; + if(avctx->flags & CODEC_FLAG_QSCALE) { + faac_cfg->bitRate = 0; + faac_cfg->quantqual = avctx->global_quality / FF_QP2LAMBDA; + } + faac_cfg->outputFormat = 1; + faac_cfg->inputFormat = FAAC_INPUT_16BIT; + + avctx->frame_size = samples_input / avctx->channels; + + avctx->coded_frame= avcodec_alloc_frame(); + avctx->coded_frame->key_frame= 1; + + /* Set decoder specific info */ + avctx->extradata_size = 0; + if (avctx->flags & CODEC_FLAG_GLOBAL_HEADER) { + + unsigned char *buffer; + unsigned long decoder_specific_info_size; + + if (!faacEncGetDecoderSpecificInfo(s->faac_handle, &buffer, + &decoder_specific_info_size)) { + avctx->extradata = buffer; + avctx->extradata_size = decoder_specific_info_size; + faac_cfg->outputFormat = 0; + } + } + + if (!faacEncSetConfiguration(s->faac_handle, faac_cfg)) { + av_log(avctx, AV_LOG_ERROR, "libfaac doesn't support this output format!\n"); + return -1; + } + + return 0; +} + +int Faac_encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + FaacAudioContext *s = avctx->priv_data; + int bytes_written; + + bytes_written = faacEncEncode(s->faac_handle, + data, + avctx->frame_size * avctx->channels, + frame, + buf_size); + + return bytes_written; +} + +int Faac_encode_close(AVCodecContext *avctx) +{ + FaacAudioContext *s = avctx->priv_data; + + av_freep(&avctx->coded_frame); + + //if (avctx->extradata_size) free(avctx->extradata); + + faacEncClose(s->faac_handle); + return 0; +} + +AVCodec faac_encoder = { + "aac", + CODEC_TYPE_AUDIO, + CODEC_ID_AAC, + sizeof(FaacAudioContext), + Faac_encode_init, + Faac_encode_frame, + Faac_encode_close +}; diff --git a/mpeg4/src/libavcodec/faad.c b/mpeg4/src/libavcodec/faad.c new file mode 100644 index 0000000000000000000000000000000000000000..49cc789a51ce529fb158148c9c20c7aeb9f79c89 --- /dev/null +++ b/mpeg4/src/libavcodec/faad.c @@ -0,0 +1,330 @@ +/* + * Faad decoder + * Copyright (c) 2003 Zdenek Kabelac. + * Copyright (c) 2004 Thomas Raivio. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file faad.c + * AAC decoder. + * + * still a bit unfinished - but it plays something + */ + +#include "avcodec.h" +#include "faad.h" + +#ifndef FAADAPI +#define FAADAPI +#endif + +/* + * when CONFIG_FAADBIN is defined the libfaad will be opened at runtime + */ +//#undef CONFIG_FAADBIN +//#define CONFIG_FAADBIN + +#ifdef CONFIG_FAADBIN +#include +static const char* libfaadname = "libfaad.so.0"; +#else +#define dlopen(a) +#define dlclose(a) +#endif + +typedef struct { + void* handle; /* dlopen handle */ + void* faac_handle; /* FAAD library handle */ + int sample_size; + int init; + + /* faad calls */ + faacDecHandle FAADAPI (*faacDecOpen)(void); + faacDecConfigurationPtr FAADAPI (*faacDecGetCurrentConfiguration)(faacDecHandle hDecoder); +#ifndef FAAD2_VERSION + int FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, + faacDecConfigurationPtr config); + int FAADAPI (*faacDecInit)(faacDecHandle hDecoder, + unsigned char *buffer, + unsigned long *samplerate, + unsigned long *channels); + int FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, + unsigned long SizeOfDecoderSpecificInfo, + unsigned long *samplerate, unsigned long *channels); + int FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, + unsigned char *buffer, + unsigned long *bytesconsumed, + short *sample_buffer, + unsigned long *samples); +#else + unsigned char FAADAPI (*faacDecSetConfiguration)(faacDecHandle hDecoder, + faacDecConfigurationPtr config); + long FAADAPI (*faacDecInit)(faacDecHandle hDecoder, + unsigned char *buffer, + unsigned long buffer_size, + unsigned long *samplerate, + unsigned char *channels); + char FAADAPI (*faacDecInit2)(faacDecHandle hDecoder, unsigned char *pBuffer, + unsigned long SizeOfDecoderSpecificInfo, + unsigned long *samplerate, unsigned char *channels); + void *FAADAPI (*faacDecDecode)(faacDecHandle hDecoder, + faacDecFrameInfo *hInfo, + unsigned char *buffer, + unsigned long buffer_size); + char* FAADAPI (*faacDecGetErrorMessage)(unsigned char errcode); +#endif + + void FAADAPI (*faacDecClose)(faacDecHandle hDecoder); + + +} FAACContext; + +static const unsigned long faac_srates[] = +{ + 96000, 88200, 64000, 48000, 44100, 32000, + 24000, 22050, 16000, 12000, 11025, 8000 +}; + +static int faac_init_mp4(AVCodecContext *avctx) +{ + FAACContext *s = (FAACContext *) avctx->priv_data; + unsigned long samplerate; +#ifndef FAAD2_VERSION + unsigned long channels; +#else + unsigned char channels; +#endif + int r = 0; + + if (avctx->extradata){ + r = s->faacDecInit2(s->faac_handle, (uint8_t*) avctx->extradata, + avctx->extradata_size, + &samplerate, &channels); + if (r < 0){ + av_log(avctx, AV_LOG_ERROR, + "faacDecInit2 failed r:%d sr:%ld ch:%ld s:%d\n", + r, samplerate, (long)channels, avctx->extradata_size); + } else { + avctx->sample_rate = samplerate; + avctx->channels = channels; + s->init = 1; + } + } + + return r; +} + +static int faac_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + FAACContext *s = (FAACContext *) avctx->priv_data; +#ifndef FAAD2_VERSION + unsigned long bytesconsumed; + short *sample_buffer = NULL; + unsigned long samples; + int out; +#else + faacDecFrameInfo frame_info; + void *out; +#endif + if(buf_size == 0) + return 0; +#ifndef FAAD2_VERSION + out = s->faacDecDecode(s->faac_handle, + (unsigned char*)buf, + &bytesconsumed, + data, + &samples); + samples *= s->sample_size; + if (data_size) + *data_size = samples; + return (buf_size < (int)bytesconsumed) + ? buf_size : (int)bytesconsumed; +#else + + if(!s->init){ + unsigned long srate; + unsigned char channels; + int r = s->faacDecInit(s->faac_handle, buf, buf_size, &srate, &channels); + if(r < 0){ + av_log(avctx, AV_LOG_ERROR, "faac: codec init failed: %s\n", + s->faacDecGetErrorMessage(frame_info.error)); + return -1; + } + avctx->sample_rate = srate; + avctx->channels = channels; + s->init = 1; + } + + out = s->faacDecDecode(s->faac_handle, &frame_info, (unsigned char*)buf, (unsigned long)buf_size); + + if (frame_info.error > 0) { + av_log(avctx, AV_LOG_ERROR, "faac: frame decoding failed: %s\n", + s->faacDecGetErrorMessage(frame_info.error)); + return -1; + } + + frame_info.samples *= s->sample_size; + memcpy(data, out, frame_info.samples); // CHECKME - can we cheat this one + + if (data_size) + *data_size = frame_info.samples; + + return (buf_size < (int)frame_info.bytesconsumed) + ? buf_size : (int)frame_info.bytesconsumed; +#endif +} + +static int faac_decode_end(AVCodecContext *avctx) +{ + FAACContext *s = (FAACContext *) avctx->priv_data; + + if (s->faacDecClose) + s->faacDecClose(s->faac_handle); + + dlclose(s->handle); + return 0; +} + +static int faac_decode_init(AVCodecContext *avctx) +{ + FAACContext *s = (FAACContext *) avctx->priv_data; + faacDecConfigurationPtr faac_cfg; + +#ifdef CONFIG_FAADBIN + const char* err = 0; + + s->handle = dlopen(libfaadname, RTLD_LAZY); + if (!s->handle) + { + av_log(avctx, AV_LOG_ERROR, "FAAD library: %s could not be opened! \n%s\n", + libfaadname, dlerror()); + return -1; + } +#define dfaac(a, b) \ + do { static const char* n = "faacDec" #a; \ + if ((s->faacDec ## a = b dlsym( s->handle, n )) == NULL) { err = n; break; } } while(0) + for(;;) { +#else /* !CONFIG_FAADBIN */ +#define dfaac(a, b) s->faacDec ## a = faacDec ## a +#endif /* CONFIG_FAADBIN */ + + // resolve all needed function calls + dfaac(Open, (faacDecHandle FAADAPI (*)(void))); + dfaac(GetCurrentConfiguration, (faacDecConfigurationPtr + FAADAPI (*)(faacDecHandle))); +#ifndef FAAD2_VERSION + dfaac(SetConfiguration, (int FAADAPI (*)(faacDecHandle, + faacDecConfigurationPtr))); + + dfaac(Init, (int FAADAPI (*)(faacDecHandle, unsigned char*, + unsigned long*, unsigned long*))); + dfaac(Init2, (int FAADAPI (*)(faacDecHandle, unsigned char*, + unsigned long, unsigned long*, + unsigned long*))); + dfaac(Close, (void FAADAPI (*)(faacDecHandle hDecoder))); + dfaac(Decode, (int FAADAPI (*)(faacDecHandle, unsigned char*, + unsigned long*, short*, unsigned long*))); +#else + dfaac(SetConfiguration, (unsigned char FAADAPI (*)(faacDecHandle, + faacDecConfigurationPtr))); + dfaac(Init, (long FAADAPI (*)(faacDecHandle, unsigned char*, + unsigned long, unsigned long*, unsigned char*))); + dfaac(Init2, (char FAADAPI (*)(faacDecHandle, unsigned char*, + unsigned long, unsigned long*, + unsigned char*))); + dfaac(Decode, (void *FAADAPI (*)(faacDecHandle, faacDecFrameInfo*, + unsigned char*, unsigned long))); + dfaac(GetErrorMessage, (char* FAADAPI (*)(unsigned char))); +#endif +#undef dfacc + +#ifdef CONFIG_FAADBIN + break; + } + if (err) { + dlclose(s->handle); + av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot resolve %s in %s!\n", + err, libfaadname); + return -1; + } +#endif + + s->faac_handle = s->faacDecOpen(); + if (!s->faac_handle) { + av_log(avctx, AV_LOG_ERROR, "FAAD library: cannot create handler!\n"); + faac_decode_end(avctx); + return -1; + } + + + faac_cfg = s->faacDecGetCurrentConfiguration(s->faac_handle); + + if (faac_cfg) { + switch (avctx->bits_per_sample) { + case 8: av_log(avctx, AV_LOG_ERROR, "FAADlib unsupported bps %d\n", avctx->bits_per_sample); break; + default: + case 16: +#ifdef FAAD2_VERSION + faac_cfg->outputFormat = FAAD_FMT_16BIT; +#endif + s->sample_size = 2; + break; + case 24: +#ifdef FAAD2_VERSION + faac_cfg->outputFormat = FAAD_FMT_24BIT; +#endif + s->sample_size = 3; + break; + case 32: +#ifdef FAAD2_VERSION + faac_cfg->outputFormat = FAAD_FMT_32BIT; +#endif + s->sample_size = 4; + break; + } + + faac_cfg->defSampleRate = (!avctx->sample_rate) ? 44100 : avctx->sample_rate; + faac_cfg->defObjectType = LC; + } + + s->faacDecSetConfiguration(s->faac_handle, faac_cfg); + + faac_init_mp4(avctx); + + return 0; +} + +#define AAC_CODEC(id, name) \ +AVCodec name ## _decoder = { \ + #name, \ + CODEC_TYPE_AUDIO, \ + id, \ + sizeof(FAACContext), \ + faac_decode_init, \ + NULL, \ + faac_decode_end, \ + faac_decode_frame, \ +} + +// FIXME - raw AAC files - maybe just one entry will be enough +AAC_CODEC(CODEC_ID_AAC, aac); +// If it's mp4 file - usually embeded into Qt Mov +AAC_CODEC(CODEC_ID_MPEG4AAC, mpeg4aac); + +#undef AAC_CODEC diff --git a/mpeg4/src/libavcodec/faandct.c b/mpeg4/src/libavcodec/faandct.c new file mode 100644 index 0000000000000000000000000000000000000000..cd7ef7c6b26673d0dc5c967686982a54c735c798 --- /dev/null +++ b/mpeg4/src/libavcodec/faandct.c @@ -0,0 +1,218 @@ +/* + * Floating point AAN DCT + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c) + */ + +/** + * @file faandct.c + * @brief + * Floating point AAN DCT + * @author Michael Niedermayer + */ + +#include "dsputil.h" +#include "faandct.h" + +#define FLOAT float +#ifdef FAAN_POSTSCALE +# define SCALE(x) postscale[x] +#else +# define SCALE(x) 1 +#endif + +//numbers generated by simple c code (not as accurate as they could be) +/* +for(i=0; i<8; i++){ + printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(i*acosl(-1.0)/(long double)16.0)*sqrtl(2))); +} +*/ +#define B0 1.00000000000000000000 +#define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1 +#define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1 +#define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1 +#define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1 +#define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1 +#define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1 +#define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1 + + +#define A1 0.70710678118654752438 // cos(pi*4/16) +#define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2) +#define A5 0.38268343236508977170 // cos(pi*6/16) +#define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2) + +static FLOAT postscale[64]={ +B0*B0, B0*B1, B0*B2, B0*B3, B0*B4, B0*B5, B0*B6, B0*B7, +B1*B0, B1*B1, B1*B2, B1*B3, B1*B4, B1*B5, B1*B6, B1*B7, +B2*B0, B2*B1, B2*B2, B2*B3, B2*B4, B2*B5, B2*B6, B2*B7, +B3*B0, B3*B1, B3*B2, B3*B3, B3*B4, B3*B5, B3*B6, B3*B7, +B4*B0, B4*B1, B4*B2, B4*B3, B4*B4, B4*B5, B4*B6, B4*B7, +B5*B0, B5*B1, B5*B2, B5*B3, B5*B4, B5*B5, B5*B6, B5*B7, +B6*B0, B6*B1, B6*B2, B6*B3, B6*B4, B6*B5, B6*B6, B6*B7, +B7*B0, B7*B1, B7*B2, B7*B3, B7*B4, B7*B5, B7*B6, B7*B7, +}; + +static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data) +{ + FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + FLOAT tmp10, tmp11, tmp12, tmp13; + FLOAT z1, z2, z3, z4, z5, z11, z13; + int i; + + for (i=0; i<8*8; i+=8) { + tmp0= data[0 + i] + data[7 + i]; + tmp7= data[0 + i] - data[7 + i]; + tmp1= data[1 + i] + data[6 + i]; + tmp6= data[1 + i] - data[6 + i]; + tmp2= data[2 + i] + data[5 + i]; + tmp5= data[2 + i] - data[5 + i]; + tmp3= data[3 + i] + data[4 + i]; + tmp4= data[3 + i] - data[4 + i]; + + tmp10= tmp0 + tmp3; + tmp13= tmp0 - tmp3; + tmp11= tmp1 + tmp2; + tmp12= tmp1 - tmp2; + + temp[0 + i]= tmp10 + tmp11; + temp[4 + i]= tmp10 - tmp11; + + z1= (tmp12 + tmp13)*A1; + temp[2 + i]= tmp13 + z1; + temp[6 + i]= tmp13 - z1; + + tmp10= tmp4 + tmp5; + tmp11= tmp5 + tmp6; + tmp12= tmp6 + tmp7; + + z5= (tmp10 - tmp12) * A5; + z2= tmp10*A2 + z5; + z4= tmp12*A4 + z5; + z3= tmp11*A1; + + z11= tmp7 + z3; + z13= tmp7 - z3; + + temp[5 + i]= z13 + z2; + temp[3 + i]= z13 - z2; + temp[1 + i]= z11 + z4; + temp[7 + i]= z11 - z4; + } +} + +void ff_faandct(DCTELEM * data) +{ + FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + FLOAT tmp10, tmp11, tmp12, tmp13; + FLOAT z1, z2, z3, z4, z5, z11, z13; + FLOAT temp[64]; + int i; + + emms_c(); + + row_fdct(temp, data); + + for (i=0; i<8; i++) { + tmp0= temp[8*0 + i] + temp[8*7 + i]; + tmp7= temp[8*0 + i] - temp[8*7 + i]; + tmp1= temp[8*1 + i] + temp[8*6 + i]; + tmp6= temp[8*1 + i] - temp[8*6 + i]; + tmp2= temp[8*2 + i] + temp[8*5 + i]; + tmp5= temp[8*2 + i] - temp[8*5 + i]; + tmp3= temp[8*3 + i] + temp[8*4 + i]; + tmp4= temp[8*3 + i] - temp[8*4 + i]; + + tmp10= tmp0 + tmp3; + tmp13= tmp0 - tmp3; + tmp11= tmp1 + tmp2; + tmp12= tmp1 - tmp2; + + data[8*0 + i]= lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); + data[8*4 + i]= lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); + + z1= (tmp12 + tmp13)* A1; + data[8*2 + i]= lrintf(SCALE(8*2 + i) * (tmp13 + z1)); + data[8*6 + i]= lrintf(SCALE(8*6 + i) * (tmp13 - z1)); + + tmp10= tmp4 + tmp5; + tmp11= tmp5 + tmp6; + tmp12= tmp6 + tmp7; + + z5= (tmp10 - tmp12) * A5; + z2= tmp10*A2 + z5; + z4= tmp12*A4 + z5; + z3= tmp11*A1; + + z11= tmp7 + z3; + z13= tmp7 - z3; + + data[8*5 + i]= lrintf(SCALE(8*5 + i) * (z13 + z2)); + data[8*3 + i]= lrintf(SCALE(8*3 + i) * (z13 - z2)); + data[8*1 + i]= lrintf(SCALE(8*1 + i) * (z11 + z4)); + data[8*7 + i]= lrintf(SCALE(8*7 + i) * (z11 - z4)); + } +} + +void ff_faandct248(DCTELEM * data) +{ + FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + FLOAT tmp10, tmp11, tmp12, tmp13; + FLOAT z1; + FLOAT temp[64]; + int i; + + emms_c(); + + row_fdct(temp, data); + + for (i=0; i<8; i++) { + tmp0 = temp[8*0 + i] + temp[8*1 + i]; + tmp1 = temp[8*2 + i] + temp[8*3 + i]; + tmp2 = temp[8*4 + i] + temp[8*5 + i]; + tmp3 = temp[8*6 + i] + temp[8*7 + i]; + tmp4 = temp[8*0 + i] - temp[8*1 + i]; + tmp5 = temp[8*2 + i] - temp[8*3 + i]; + tmp6 = temp[8*4 + i] - temp[8*5 + i]; + tmp7 = temp[8*6 + i] - temp[8*7 + i]; + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + tmp13 = tmp0 - tmp3; + + data[8*0 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); + data[8*4 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); + + z1 = (tmp12 + tmp13)* A1; + data[8*2 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); + data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); + + tmp10 = tmp4 + tmp7; + tmp11 = tmp5 + tmp6; + tmp12 = tmp5 - tmp6; + tmp13 = tmp4 - tmp7; + + data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11)); + data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11)); + + z1 = (tmp12 + tmp13)* A1; + data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1)); + data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1)); + } +} diff --git a/mpeg4/src/libavcodec/faandct.h b/mpeg4/src/libavcodec/faandct.h new file mode 100644 index 0000000000000000000000000000000000000000..677594c04c84839021984d800340426fa552ab61 --- /dev/null +++ b/mpeg4/src/libavcodec/faandct.h @@ -0,0 +1,31 @@ +/* + * Floating point AAN DCT + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file faandct.h + * @brief + * Floating point AAN DCT + * @author Michael Niedermayer + */ + +#define FAAN_POSTSCALE + +void ff_faandct(DCTELEM * data); +void ff_faandct248(DCTELEM * data); diff --git a/mpeg4/src/libavcodec/fdctref.c b/mpeg4/src/libavcodec/fdctref.c new file mode 100644 index 0000000000000000000000000000000000000000..5eff36849132c264671e19b94a64753b39839483 --- /dev/null +++ b/mpeg4/src/libavcodec/fdctref.c @@ -0,0 +1,158 @@ +/** + * @file fdctref.c + * forward discrete cosine transform, double precision. + */ + +/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ + +/* + * Disclaimer of Warranty + * + * These software programs are available to the user without any license fee or + * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims + * any and all warranties, whether express, implied, or statuary, including any + * implied warranties or merchantability or of fitness for a particular + * purpose. In no event shall the copyright-holder be liable for any + * incidental, punitive, or consequential damages of any kind whatsoever + * arising from the use of these programs. + * + * This disclaimer of warranty extends to the user of these programs and user's + * customers, employees, agents, transferees, successors, and assigns. + * + * The MPEG Software Simulation Group does not represent or warrant that the + * programs furnished hereunder are free of infringement of any third-party + * patents. + * + * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, + * are subject to royalty fees to patent holders. Many of these patents are + * general enough such that they are unavoidable regardless of implementation + * design. + * + */ + +#include + +#ifndef PI +# ifdef M_PI +# define PI M_PI +# else +# define PI 3.14159265358979323846 +# endif +#endif + +/* global declarations */ +void init_fdct (void); +void fdct (short *block); + +/* private data */ +static double c[8][8]; /* transform coefficients */ + +void init_fdct() +{ + int i, j; + double s; + + for (i=0; i<8; i++) + { + s = (i==0) ? sqrt(0.125) : 0.5; + + for (j=0; j<8; j++) + c[i][j] = s * cos((PI/8.0)*i*(j+0.5)); + } +} + +void fdct(block) +short *block; +{ + register int i, j; + double s; + double tmp[64]; + + for(i = 0; i < 8; i++) + for(j = 0; j < 8; j++) + { + s = 0.0; + +/* + * for(k = 0; k < 8; k++) + * s += c[j][k] * block[8 * i + k]; + */ + s += c[j][0] * block[8 * i + 0]; + s += c[j][1] * block[8 * i + 1]; + s += c[j][2] * block[8 * i + 2]; + s += c[j][3] * block[8 * i + 3]; + s += c[j][4] * block[8 * i + 4]; + s += c[j][5] * block[8 * i + 5]; + s += c[j][6] * block[8 * i + 6]; + s += c[j][7] * block[8 * i + 7]; + + tmp[8 * i + j] = s; + } + + for(j = 0; j < 8; j++) + for(i = 0; i < 8; i++) + { + s = 0.0; + +/* + * for(k = 0; k < 8; k++) + * s += c[i][k] * tmp[8 * k + j]; + */ + s += c[i][0] * tmp[8 * 0 + j]; + s += c[i][1] * tmp[8 * 1 + j]; + s += c[i][2] * tmp[8 * 2 + j]; + s += c[i][3] * tmp[8 * 3 + j]; + s += c[i][4] * tmp[8 * 4 + j]; + s += c[i][5] * tmp[8 * 5 + j]; + s += c[i][6] * tmp[8 * 6 + j]; + s += c[i][7] * tmp[8 * 7 + j]; + s*=8.0; + + block[8 * i + j] = (short)floor(s + 0.499999); +/* + * reason for adding 0.499999 instead of 0.5: + * s is quite often x.5 (at least for i and/or j = 0 or 4) + * and setting the rounding threshold exactly to 0.5 leads to an + * extremely high arithmetic implementation dependency of the result; + * s being between x.5 and x.500001 (which is now incorrectly rounded + * downwards instead of upwards) is assumed to occur less often + * (if at all) + */ + } +} + +/* perform IDCT matrix multiply for 8x8 coefficient block */ + +void idct(block) +short *block; +{ + int i, j, k, v; + double partial_product; + double tmp[64]; + + for (i=0; i<8; i++) + for (j=0; j<8; j++) + { + partial_product = 0.0; + + for (k=0; k<8; k++) + partial_product+= c[k][j]*block[8*i+k]; + + tmp[8*i+j] = partial_product; + } + + /* Transpose operation is integrated into address mapping by switching + loop order of i and j */ + + for (j=0; j<8; j++) + for (i=0; i<8; i++) + { + partial_product = 0.0; + + for (k=0; k<8; k++) + partial_product+= c[k][i]*tmp[8*k+j]; + + v = (int) floor(partial_product+0.5); + block[8*i+j] = v; + } +} diff --git a/mpeg4/src/libavcodec/fft-test.c b/mpeg4/src/libavcodec/fft-test.c new file mode 100644 index 0000000000000000000000000000000000000000..f924dcadfdffc2d85dcf17c1949a584dd39f7cb6 --- /dev/null +++ b/mpeg4/src/libavcodec/fft-test.c @@ -0,0 +1,277 @@ +/** + * @file fft-test.c + * FFT and MDCT tests. + */ + +#include "dsputil.h" +#include +#include +#include + +int mm_flags; + +/* reference fft */ + +#define MUL16(a,b) ((a) * (b)) + +#define CMAC(pre, pim, are, aim, bre, bim) \ +{\ + pre += (MUL16(are, bre) - MUL16(aim, bim));\ + pim += (MUL16(are, bim) + MUL16(bre, aim));\ +} + +FFTComplex *exptab; + +void fft_ref_init(int nbits, int inverse) +{ + int n, i; + float c1, s1, alpha; + + n = 1 << nbits; + exptab = av_malloc((n / 2) * sizeof(FFTComplex)); + + for(i=0;i<(n/2);i++) { + alpha = 2 * M_PI * (float)i / (float)n; + c1 = cos(alpha); + s1 = sin(alpha); + if (!inverse) + s1 = -s1; + exptab[i].re = c1; + exptab[i].im = s1; + } +} + +void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits) +{ + int n, i, j, k, n2; + float tmp_re, tmp_im, s, c; + FFTComplex *q; + + n = 1 << nbits; + n2 = n >> 1; + for(i=0;i= n2) { + c = -exptab[k - n2].re; + s = -exptab[k - n2].im; + } else { + c = exptab[k].re; + s = exptab[k].im; + } + CMAC(tmp_re, tmp_im, c, s, q->re, q->im); + q++; + } + tabr[i].re = tmp_re; + tabr[i].im = tmp_im; + } +} + +void imdct_ref(float *out, float *in, int n) +{ + int k, i, a; + float sum, f; + + for(i=0;i= 1e-3) { + av_log(NULL, AV_LOG_ERROR, "ERROR %d: %f %f\n", + i, tab1[i], tab2[i]); + } + } +} + + +void help(void) +{ + av_log(NULL, AV_LOG_INFO,"usage: fft-test [-h] [-s] [-i] [-n b]\n" + "-h print this help\n" + "-s speed test\n" + "-m (I)MDCT test\n" + "-i inverse transform test\n" + "-n b set the transform size to 2^b\n" + ); + exit(1); +} + + + +int main(int argc, char **argv) +{ + FFTComplex *tab, *tab1, *tab_ref; + FFTSample *tabtmp, *tab2; + int it, i, c; + int do_speed = 0; + int do_mdct = 0; + int do_inverse = 0; + FFTContext s1, *s = &s1; + MDCTContext m1, *m = &m1; + int fft_nbits, fft_size; + + mm_flags = 0; + fft_nbits = 9; + for(;;) { + c = getopt(argc, argv, "hsimn:"); + if (c == -1) + break; + switch(c) { + case 'h': + help(); + break; + case 's': + do_speed = 1; + break; + case 'i': + do_inverse = 1; + break; + case 'm': + do_mdct = 1; + break; + case 'n': + fft_nbits = atoi(optarg); + break; + } + } + + fft_size = 1 << fft_nbits; + tab = av_malloc(fft_size * sizeof(FFTComplex)); + tab1 = av_malloc(fft_size * sizeof(FFTComplex)); + tab_ref = av_malloc(fft_size * sizeof(FFTComplex)); + tabtmp = av_malloc(fft_size / 2 * sizeof(FFTSample)); + tab2 = av_malloc(fft_size * sizeof(FFTSample)); + + if (do_mdct) { + if (do_inverse) + av_log(NULL, AV_LOG_INFO,"IMDCT"); + else + av_log(NULL, AV_LOG_INFO,"MDCT"); + ff_mdct_init(m, fft_nbits, do_inverse); + } else { + if (do_inverse) + av_log(NULL, AV_LOG_INFO,"IFFT"); + else + av_log(NULL, AV_LOG_INFO,"FFT"); + ff_fft_init(s, fft_nbits, do_inverse); + fft_ref_init(fft_nbits, do_inverse); + } + av_log(NULL, AV_LOG_INFO," %d test\n", fft_size); + + /* generate random data */ + + for(i=0;i= 1000000) + break; + nb_its *= 2; + } + av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n", + (double)duration / nb_its, + (double)duration / 1000000.0, + nb_its); + } + + if (do_mdct) { + ff_mdct_end(m); + } else { + ff_fft_end(s); + } + return 0; +} diff --git a/mpeg4/src/libavcodec/fft.c b/mpeg4/src/libavcodec/fft.c new file mode 100644 index 0000000000000000000000000000000000000000..1306abd69b82ae6c7519a4518e4ae5548ac8e511 --- /dev/null +++ b/mpeg4/src/libavcodec/fft.c @@ -0,0 +1,266 @@ +/* + * FFT/IFFT transforms + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file fft.c + * FFT/IFFT transforms. + */ + +#include "dsputil.h" + +/** + * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is + * done + */ +int ff_fft_init(FFTContext *s, int nbits, int inverse) +{ + int i, j, m, n; + float alpha, c1, s1, s2; + + s->nbits = nbits; + n = 1 << nbits; + + s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); + if (!s->exptab) + goto fail; + s->revtab = av_malloc(n * sizeof(uint16_t)); + if (!s->revtab) + goto fail; + s->inverse = inverse; + + s2 = inverse ? 1.0 : -1.0; + + for(i=0;i<(n/2);i++) { + alpha = 2 * M_PI * (float)i / (float)n; + c1 = cos(alpha); + s1 = sin(alpha) * s2; + s->exptab[i].re = c1; + s->exptab[i].im = s1; + } + s->fft_calc = ff_fft_calc_c; + s->exptab1 = NULL; + + /* compute constant table for HAVE_SSE version */ +#if (defined(HAVE_MMX) && (defined(HAVE_BUILTIN_VECTOR) || defined(HAVE_MM3DNOW))) || defined(HAVE_ALTIVEC) + { + int has_vectors = 0; + +#if defined(HAVE_MMX) + has_vectors = mm_support() & (MM_3DNOW | MM_3DNOWEXT | MM_SSE | MM_SSE2); +#endif +#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE) + has_vectors = mm_support() & MM_ALTIVEC; +#endif + if (has_vectors) { + int np, nblocks, np2, l; + FFTComplex *q; + + np = 1 << nbits; + nblocks = np >> 3; + np2 = np >> 1; + s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); + if (!s->exptab1) + goto fail; + q = s->exptab1; + do { + for(l = 0; l < np2; l += 2 * nblocks) { + *q++ = s->exptab[l]; + *q++ = s->exptab[l + nblocks]; + + q->re = -s->exptab[l].im; + q->im = s->exptab[l].re; + q++; + q->re = -s->exptab[l + nblocks].im; + q->im = s->exptab[l + nblocks].re; + q++; + } + nblocks = nblocks >> 1; + } while (nblocks != 0); + av_freep(&s->exptab); +#if defined(HAVE_MMX) +#ifdef HAVE_MM3DNOW + if (has_vectors & MM_3DNOWEXT) + /* 3DNowEx for Athlon(XP) */ + s->fft_calc = ff_fft_calc_3dn2; + else if (has_vectors & MM_3DNOW) + /* 3DNow! for K6-2/3 */ + s->fft_calc = ff_fft_calc_3dn; +#endif +#ifdef HAVE_BUILTIN_VECTOR + if (has_vectors & MM_SSE2) + /* SSE for P4/K8 */ + s->fft_calc = ff_fft_calc_sse; + else if ((has_vectors & MM_SSE) && + s->fft_calc == ff_fft_calc_c) + /* SSE for P3 */ + s->fft_calc = ff_fft_calc_sse; +#endif +#else /* HAVE_MMX */ + s->fft_calc = ff_fft_calc_altivec; +#endif + } + } +#endif + + /* compute bit reverse table */ + + for(i=0;i> j) & 1) << (nbits-j-1); + } + s->revtab[i]=m; + } + return 0; + fail: + av_freep(&s->revtab); + av_freep(&s->exptab); + av_freep(&s->exptab1); + return -1; +} + +/* butter fly op */ +#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ +{\ + FFTSample ax, ay, bx, by;\ + bx=pre1;\ + by=pim1;\ + ax=qre1;\ + ay=qim1;\ + pre = (bx + ax);\ + pim = (by + ay);\ + qre = (bx - ax);\ + qim = (by - ay);\ +} + +#define MUL16(a,b) ((a) * (b)) + +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + pre = (MUL16(are, bre) - MUL16(aim, bim));\ + pim = (MUL16(are, bim) + MUL16(bre, aim));\ +} + +/** + * Do a complex FFT with the parameters defined in ff_fft_init(). The + * input data must be permuted before with s->revtab table. No + * 1.0/sqrt(n) normalization is done. + */ +void ff_fft_calc_c(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *exptab = s->exptab; + int l; + FFTSample tmp_re, tmp_im; + + np = 1 << ln; + + /* pass 0 */ + + p=&z[0]; + j=(np >> 1); + do { + BF(p[0].re, p[0].im, p[1].re, p[1].im, + p[0].re, p[0].im, p[1].re, p[1].im); + p+=2; + } while (--j != 0); + + /* pass 1 */ + + + p=&z[0]; + j=np >> 2; + if (s->inverse) { + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, -p[3].im, p[3].re); + p+=4; + } while (--j != 0); + } else { + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, p[3].im, -p[3].re); + p+=4; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + do { + p = z; + q = z + nloops; + for (j = 0; j < nblocks; ++j) { + BF(p->re, p->im, q->re, q->im, + p->re, p->im, q->re, q->im); + + p++; + q++; + for(l = nblocks; l < np2; l += nblocks) { + CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); + BF(p->re, p->im, q->re, q->im, + p->re, p->im, tmp_re, tmp_im); + p++; + q++; + } + + p += nloops; + q += nloops; + } + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +/** + * Do the permutation needed BEFORE calling ff_fft_calc() + */ +void ff_fft_permute(FFTContext *s, FFTComplex *z) +{ + int j, k, np; + FFTComplex tmp; + const uint16_t *revtab = s->revtab; + + /* reverse */ + np = 1 << s->nbits; + for(j=0;jrevtab); + av_freep(&s->exptab); + av_freep(&s->exptab1); +} + diff --git a/mpeg4/src/libavcodec/ffv1.c b/mpeg4/src/libavcodec/ffv1.c new file mode 100644 index 0000000000000000000000000000000000000000..36a85d9a42023d97ee5f5fdb13f0f209df48e494 --- /dev/null +++ b/mpeg4/src/libavcodec/ffv1.c @@ -0,0 +1,1037 @@ +/* + * FFV1 codec for libavcodec + * + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file ffv1.c + * FF Video Codec 1 (an experimental lossless codec) + */ + +#include "common.h" +#include "bitstream.h" +#include "avcodec.h" +#include "dsputil.h" +#include "rangecoder.h" +#include "golomb.h" + +#define MAX_PLANES 4 +#define CONTEXT_SIZE 32 + +static const int8_t quant3[256]={ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, +}; +static const int8_t quant5[256]={ + 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, +}; +static const int8_t quant7[256]={ + 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, +}; +static const int8_t quant9[256]={ + 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, +}; +static const int8_t quant11[256]={ + 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, +}; +static const int8_t quant13[256]={ + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, +}; + +static const uint8_t log2_run[32]={ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, + 8, 9,10,11,12,13,14,15, +}; + +typedef struct VlcState{ + int16_t drift; + uint16_t error_sum; + int8_t bias; + uint8_t count; +} VlcState; + +typedef struct PlaneContext{ + int context_count; + uint8_t (*state)[CONTEXT_SIZE]; + VlcState *vlc_state; + uint8_t interlace_bit_state[2]; +} PlaneContext; + +typedef struct FFV1Context{ + AVCodecContext *avctx; + RangeCoder c; + GetBitContext gb; + PutBitContext pb; + int version; + int width, height; + int chroma_h_shift, chroma_v_shift; + int flags; + int picture_number; + AVFrame picture; + int plane_count; + int ac; ///< 1-> CABAC 0-> golomb rice + PlaneContext plane[MAX_PLANES]; + int16_t quant_table[5][256]; + int run_index; + int colorspace; + + DSPContext dsp; +}FFV1Context; + +static always_inline int fold(int diff, int bits){ + if(bits==8) + diff= (int8_t)diff; + else{ + diff+= 1<<(bits-1); + diff&=(1<quant_table[3][127]){ + const int TT= last2[0]; + const int LL= src[-2]; + return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF] + +f->quant_table[3][(LL-L) & 0xFF] + f->quant_table[4][(TT-T) & 0xFF]; + }else + return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF]; +} + +static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ + int i; + + if(v){ + const int a= ABS(v); + const int e= av_log2(a); + put_rac(c, state+0, 0); + + assert(e<=9); + + for(i=0; i=0; i--){ + put_rac(c, state+22+i, (a>>i)&1); //22..31 + } + + if(is_signed) + put_rac(c, state+11 + e, v < 0); //11..21 + }else{ + put_rac(c, state+0, 1); + } +} + +static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ + if(get_rac(c, state+0)) + return 0; + else{ + int i, e, a; + e= 0; + while(get_rac(c, state+1 + e)){ //1..10 + e++; + } + assert(e<=9); + + a= 1; + for(i=e-1; i>=0; i--){ + a += a + get_rac(c, state+22 + i); //22..31 + } + + if(is_signed && get_rac(c, state+11 + e)) //11..21 + return -a; + else + return a; + } +} + +static inline void update_vlc_state(VlcState * const state, const int v){ + int drift= state->drift; + int count= state->count; + state->error_sum += ABS(v); + drift += v; + + if(count == 128){ //FIXME variable + count >>= 1; + drift >>= 1; + state->error_sum >>= 1; + } + count++; + + if(drift <= -count){ + if(state->bias > -128) state->bias--; + + drift += count; + if(drift <= -count) + drift= -count + 1; + }else if(drift > 0){ + if(state->bias < 127) state->bias++; + + drift -= count; + if(drift > 0) + drift= 0; + } + + state->drift= drift; + state->count= count; +} + +static inline void put_vlc_symbol(PutBitContext *pb, VlcState * const state, int v, int bits){ + int i, k, code; +//printf("final: %d ", v); + v = fold(v - state->bias, bits); + + i= state->count; + k=0; + while(i < state->error_sum){ //FIXME optimize + k++; + i += i; + } + + assert(k<=8); + +#if 0 // JPEG LS + if(k==0 && 2*state->drift <= - state->count) code= v ^ (-1); + else code= v; +#else + code= v ^ ((2*state->drift + state->count)>>31); +#endif + +//printf("v:%d/%d bias:%d error:%d drift:%d count:%d k:%d\n", v, code, state->bias, state->error_sum, state->drift, state->count, k); + set_sr_golomb(pb, code, k, 12, bits); + + update_vlc_state(state, v); +} + +static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int bits){ + int k, i, v, ret; + + i= state->count; + k=0; + while(i < state->error_sum){ //FIXME optimize + k++; + i += i; + } + + assert(k<=8); + + v= get_sr_golomb(gb, k, 12, bits); +//printf("v:%d bias:%d error:%d drift:%d count:%d k:%d", v, state->bias, state->error_sum, state->drift, state->count, k); + +#if 0 // JPEG LS + if(k==0 && 2*state->drift <= - state->count) v ^= (-1); +#else + v ^= ((2*state->drift + state->count)>>31); +#endif + + ret= fold(v + state->bias, bits); + + update_vlc_state(state, v); +//printf("final: %d\n", ret); + return ret; +} + +static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ + PlaneContext * const p= &s->plane[plane_index]; + RangeCoder * const c= &s->c; + int x; + int run_index= s->run_index; + int run_count=0; + int run_mode=0; + + if(s->ac){ + if(c->bytestream_end - c->bytestream < w*20){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + }else{ + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < w*4){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + } + + for(x=0; xac){ + put_symbol(c, p->state[context], diff, 1); + }else{ + if(context == 0) run_mode=1; + + if(run_mode){ + + if(diff){ + while(run_count >= 1<pb, 1, 1); + } + + put_bits(&s->pb, 1 + log2_run[run_index], run_count); + if(run_index) run_index--; + run_count=0; + run_mode=0; + if(diff>0) diff--; + }else{ + run_count++; + } + } + +// printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, (int)put_bits_count(&s->pb)); + + if(run_mode == 0) + put_vlc_symbol(&s->pb, &p->vlc_state[context], diff, bits); + } + } + if(run_mode){ + while(run_count >= 1<pb, 1, 1); + } + + if(run_count) + put_bits(&s->pb, 1, 1); + } + s->run_index= run_index; + + return 0; +} + +static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){ + int x,y,i; + const int ring_size= s->avctx->context_model ? 3 : 2; + int_fast16_t sample_buffer[ring_size][w+6], *sample[ring_size]; + s->run_index=0; + + memset(sample_buffer, 0, sizeof(sample_buffer)); + + for(y=0; yavctx->context_model ? 3 : 2; + int_fast16_t sample_buffer[3][ring_size][w+6], *sample[3][ring_size]; + s->run_index=0; + + memset(sample_buffer, 0, sizeof(sample_buffer)); + + for(y=0; y>8)&0xFF; + int r= (v>>16)&0xFF; + + b -= g; + r -= g; + g += (b + r)>>2; + b += 0x100; + r += 0x100; + +// assert(g>=0 && b>=0 && r>=0); +// assert(g<256 && b<512 && r<512); + sample[0][0][x]= g; + sample[1][0][x]= b; + sample[2][0][x]= r; + } + for(p=0; p<3; p++){ + sample[p][0][-1]= sample[p][1][0 ]; + sample[p][1][ w]= sample[p][1][w-1]; + encode_line(s, w, sample[p], FFMIN(p, 1), 9); + } + } +} + +static void write_quant_table(RangeCoder *c, int16_t *quant_table){ + int last=0; + int i; + uint8_t state[CONTEXT_SIZE]; + memset(state, 128, sizeof(state)); + + for(i=1; i<128 ; i++){ + if(quant_table[i] != quant_table[i-1]){ + put_symbol(c, state, i-last-1, 0); + last= i; + } + } + put_symbol(c, state, i-last-1, 0); +} + +static void write_header(FFV1Context *f){ + uint8_t state[CONTEXT_SIZE]; + int i; + RangeCoder * const c= &f->c; + + memset(state, 128, sizeof(state)); + + put_symbol(c, state, f->version, 0); + put_symbol(c, state, f->avctx->coder_type, 0); + put_symbol(c, state, f->colorspace, 0); //YUV cs type + put_rac(c, state, 1); //chroma planes + put_symbol(c, state, f->chroma_h_shift, 0); + put_symbol(c, state, f->chroma_v_shift, 0); + put_rac(c, state, 0); //no transparency plane + + for(i=0; i<5; i++) + write_quant_table(c, f->quant_table[i]); +} + +static int common_init(AVCodecContext *avctx){ + FFV1Context *s = avctx->priv_data; + int width, height; + + s->avctx= avctx; + s->flags= avctx->flags; + + dsputil_init(&s->dsp, avctx); + + width= s->width= avctx->width; + height= s->height= avctx->height; + + assert(width && height); + + return 0; +} + +static int encode_init(AVCodecContext *avctx) +{ + FFV1Context *s = avctx->priv_data; + int i; + + common_init(avctx); + + s->version=0; + s->ac= avctx->coder_type; + + s->plane_count=2; + for(i=0; i<256; i++){ + s->quant_table[0][i]= quant11[i]; + s->quant_table[1][i]= 11*quant11[i]; + if(avctx->context_model==0){ + s->quant_table[2][i]= 11*11*quant11[i]; + s->quant_table[3][i]= + s->quant_table[4][i]=0; + }else{ + s->quant_table[2][i]= 11*11*quant5 [i]; + s->quant_table[3][i]= 5*11*11*quant5 [i]; + s->quant_table[4][i]= 5*5*11*11*quant5 [i]; + } + } + + for(i=0; iplane_count; i++){ + PlaneContext * const p= &s->plane[i]; + + if(avctx->context_model==0){ + p->context_count= (11*11*11+1)/2; + }else{ + p->context_count= (11*11*5*5*5+1)/2; + } + + if(s->ac){ + if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t)); + }else{ + if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState)); + } + } + + avctx->coded_frame= &s->picture; + switch(avctx->pix_fmt){ + case PIX_FMT_YUV444P: + case PIX_FMT_YUV422P: + case PIX_FMT_YUV420P: + case PIX_FMT_YUV411P: + case PIX_FMT_YUV410P: + s->colorspace= 0; + break; + case PIX_FMT_RGBA32: + s->colorspace= 1; + break; + default: + av_log(avctx, AV_LOG_ERROR, "format not supported\n"); + return -1; + } + avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); + + s->picture_number=0; + + return 0; +} + + +static void clear_state(FFV1Context *f){ + int i, j; + + for(i=0; iplane_count; i++){ + PlaneContext *p= &f->plane[i]; + + p->interlace_bit_state[0]= 128; + p->interlace_bit_state[1]= 128; + + for(j=0; jcontext_count; j++){ + if(f->ac){ + memset(p->state[j], 128, sizeof(uint8_t)*CONTEXT_SIZE); + }else{ + p->vlc_state[j].drift= 0; + p->vlc_state[j].error_sum= 4; //FFMAX((RANGE + 32)/64, 2); + p->vlc_state[j].bias= 0; + p->vlc_state[j].count= 1; + } + } + } +} + +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + FFV1Context *f = avctx->priv_data; + RangeCoder * const c= &f->c; + AVFrame *pict = data; + const int width= f->width; + const int height= f->height; + AVFrame * const p= &f->picture; + int used_count= 0; + uint8_t keystate=128; + + ff_init_range_encoder(c, buf, buf_size); +// ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + + *p = *pict; + p->pict_type= FF_I_TYPE; + + if(avctx->gop_size==0 || f->picture_number % avctx->gop_size == 0){ + put_rac(c, &keystate, 1); + p->key_frame= 1; + write_header(f); + clear_state(f); + }else{ + put_rac(c, &keystate, 0); + p->key_frame= 0; + } + + if(!f->ac){ + used_count += ff_rac_terminate(c); +//printf("pos=%d\n", used_count); + init_put_bits(&f->pb, buf + used_count, buf_size - used_count); + } + + if(f->colorspace==0){ + const int chroma_width = -((-width )>>f->chroma_h_shift); + const int chroma_height= -((-height)>>f->chroma_v_shift); + + encode_plane(f, p->data[0], width, height, p->linesize[0], 0); + + encode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1); + encode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1); + }else{ + encode_rgb_frame(f, (uint32_t*)(p->data[0]), width, height, p->linesize[0]/4); + } + emms_c(); + + f->picture_number++; + + if(f->ac){ + return ff_rac_terminate(c); + }else{ + flush_put_bits(&f->pb); //nicer padding FIXME + return used_count + (put_bits_count(&f->pb)+7)/8; + } +} + +static void common_end(FFV1Context *s){ + int i; + + for(i=0; iplane_count; i++){ + PlaneContext *p= &s->plane[i]; + + av_freep(&p->state); + } +} + +static int encode_end(AVCodecContext *avctx) +{ + FFV1Context *s = avctx->priv_data; + + common_end(s); + + return 0; +} + +static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ + PlaneContext * const p= &s->plane[plane_index]; + RangeCoder * const c= &s->c; + int x; + int run_count=0; + int run_mode=0; + int run_index= s->run_index; + + for(x=0; xac){ + diff= get_symbol(c, p->state[context], 1); + }else{ + if(context == 0 && run_mode==0) run_mode=1; + + if(run_mode){ + if(run_count==0 && run_mode==1){ + if(get_bits1(&s->gb)){ + run_count = 1<gb, log2_run[run_index]); + else run_count=0; + if(run_index) run_index--; + run_mode=2; + } + } + run_count--; + if(run_count < 0){ + run_mode=0; + run_count=0; + diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits); + if(diff>=0) diff++; + }else + diff=0; + }else + diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits); + +// printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, get_bits_count(&s->gb)); + } + + if(sign) diff= -diff; + + sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff) & ((1<run_index= run_index; +} + +static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){ + int x, y; + int_fast16_t sample_buffer[2][w+6]; + int_fast16_t *sample[2]= {sample_buffer[0]+3, sample_buffer[1]+3}; + + s->run_index=0; + + memset(sample_buffer, 0, sizeof(sample_buffer)); + + for(y=0; yrun_index=0; + + memset(sample_buffer, 0, sizeof(sample_buffer)); + + for(y=0; y=0 && b>=0 && r>=0); +// assert(g<256 && b<512 && r<512); + + b -= 0x100; + r -= 0x100; + g -= (b + r)>>2; + b += g; + r += g; + + src[x + stride*y]= b + (g<<8) + (r<<16); + } + } +} + +static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){ + int v; + int i=0; + uint8_t state[CONTEXT_SIZE]; + + memset(state, 128, sizeof(state)); + + for(v=0; i<128 ; v++){ + int len= get_symbol(c, state, 0) + 1; + + if(len + i > 128) return -1; + + while(len--){ + quant_table[i] = scale*v; + i++; +//printf("%2d ",v); +//if(i%16==0) printf("\n"); + } + } + + for(i=1; i<128; i++){ + quant_table[256-i]= -quant_table[i]; + } + quant_table[128]= -quant_table[127]; + + return 2*v - 1; +} + +static int read_header(FFV1Context *f){ + uint8_t state[CONTEXT_SIZE]; + int i, context_count; + RangeCoder * const c= &f->c; + + memset(state, 128, sizeof(state)); + + f->version= get_symbol(c, state, 0); + f->ac= f->avctx->coder_type= get_symbol(c, state, 0); + f->colorspace= get_symbol(c, state, 0); //YUV cs type + get_rac(c, state); //no chroma = false + f->chroma_h_shift= get_symbol(c, state, 0); + f->chroma_v_shift= get_symbol(c, state, 0); + get_rac(c, state); //transparency plane + f->plane_count= 2; + + if(f->colorspace==0){ + switch(16*f->chroma_h_shift + f->chroma_v_shift){ + case 0x00: f->avctx->pix_fmt= PIX_FMT_YUV444P; break; + case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P; break; + case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P; break; + case 0x20: f->avctx->pix_fmt= PIX_FMT_YUV411P; break; + case 0x22: f->avctx->pix_fmt= PIX_FMT_YUV410P; break; + default: + av_log(f->avctx, AV_LOG_ERROR, "format not supported\n"); + return -1; + } + }else if(f->colorspace==1){ + if(f->chroma_h_shift || f->chroma_v_shift){ + av_log(f->avctx, AV_LOG_ERROR, "chroma subsampling not supported in this colorspace\n"); + return -1; + } + f->avctx->pix_fmt= PIX_FMT_RGBA32; + }else{ + av_log(f->avctx, AV_LOG_ERROR, "colorspace not supported\n"); + return -1; + } + +//printf("%d %d %d\n", f->chroma_h_shift, f->chroma_v_shift,f->avctx->pix_fmt); + + context_count=1; + for(i=0; i<5; i++){ + context_count*= read_quant_table(c, f->quant_table[i], context_count); + if(context_count < 0 || context_count > 32768){ + av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n"); + return -1; + } + } + context_count= (context_count+1)/2; + + for(i=0; iplane_count; i++){ + PlaneContext * const p= &f->plane[i]; + + p->context_count= context_count; + + if(f->ac){ + if(!p->state) p->state= av_malloc(CONTEXT_SIZE*p->context_count*sizeof(uint8_t)); + }else{ + if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState)); + } + } + + return 0; +} + +static int decode_init(AVCodecContext *avctx) +{ +// FFV1Context *s = avctx->priv_data; + + common_init(avctx); + + return 0; +} + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ + FFV1Context *f = avctx->priv_data; + RangeCoder * const c= &f->c; + const int width= f->width; + const int height= f->height; + AVFrame * const p= &f->picture; + int bytes_read; + uint8_t keystate= 128; + + AVFrame *picture = data; + + ff_init_range_decoder(c, buf, buf_size); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + + + p->pict_type= FF_I_TYPE; //FIXME I vs. P + if(get_rac(c, &keystate)){ + p->key_frame= 1; + if(read_header(f) < 0) + return -1; + clear_state(f); + }else{ + p->key_frame= 0; + } + if(!f->plane[0].state && !f->plane[0].vlc_state) + return -1; + + p->reference= 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + if(avctx->debug&FF_DEBUG_PICT_INFO) + av_log(avctx, AV_LOG_ERROR, "keyframe:%d coder:%d\n", p->key_frame, f->ac); + + if(!f->ac){ + bytes_read = c->bytestream - c->bytestream_start - 1; + if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); //FIXME +//printf("pos=%d\n", bytes_read); + init_get_bits(&f->gb, buf + bytes_read, buf_size - bytes_read); + } else { + bytes_read = 0; /* avoid warning */ + } + + if(f->colorspace==0){ + const int chroma_width = -((-width )>>f->chroma_h_shift); + const int chroma_height= -((-height)>>f->chroma_v_shift); + decode_plane(f, p->data[0], width, height, p->linesize[0], 0); + + decode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1); + decode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1); + }else{ + decode_rgb_frame(f, (uint32_t*)p->data[0], width, height, p->linesize[0]/4); + } + + emms_c(); + + f->picture_number++; + + *picture= *p; + + avctx->release_buffer(avctx, p); //FIXME + + *data_size = sizeof(AVFrame); + + if(f->ac){ + bytes_read= c->bytestream - c->bytestream_start - 1; + if(bytes_read ==0) av_log(f->avctx, AV_LOG_ERROR, "error at end of frame\n"); + }else{ + bytes_read+= (get_bits_count(&f->gb)+7)/8; + } + + return bytes_read; +} + +AVCodec ffv1_decoder = { + "ffv1", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV1, + sizeof(FFV1Context), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/, + NULL +}; + +#ifdef CONFIG_ENCODERS +AVCodec ffv1_encoder = { + "ffv1", + CODEC_TYPE_VIDEO, + CODEC_ID_FFV1, + sizeof(FFV1Context), + encode_init, + encode_frame, + encode_end, +}; +#endif diff --git a/mpeg4/src/libavcodec/flac.c b/mpeg4/src/libavcodec/flac.c new file mode 100644 index 0000000000000000000000000000000000000000..8710e21d3d96b3b44bf66487752934ba5bd16b7d --- /dev/null +++ b/mpeg4/src/libavcodec/flac.c @@ -0,0 +1,779 @@ +/* + * FLAC (Free Lossless Audio Codec) decoder + * Copyright (c) 2003 Alex Beregszaszi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file flac.c + * FLAC (Free Lossless Audio Codec) decoder + * @author Alex Beregszaszi + * + * For more information on the FLAC format, visit: + * http://flac.sourceforge.net/ + * + * This decoder can be used in 1 of 2 ways: Either raw FLAC data can be fed + * through, starting from the initial 'fLaC' signature; or by passing the + * 34-byte streaminfo structure through avctx->extradata[_size] followed + * by data starting with the 0xFFF8 marker. + */ + +#include + +#include "avcodec.h" +#include "bitstream.h" +#include "golomb.h" +#include "crc.h" + +#undef NDEBUG +#include + +#define MAX_CHANNELS 8 +#define MAX_BLOCKSIZE 65535 +#define FLAC_STREAMINFO_SIZE 34 + +enum decorrelation_type { + INDEPENDENT, + LEFT_SIDE, + RIGHT_SIDE, + MID_SIDE, +}; + +typedef struct FLACContext { + AVCodecContext *avctx; + GetBitContext gb; + + int min_blocksize, max_blocksize; + int min_framesize, max_framesize; + int samplerate, channels; + int blocksize/*, last_blocksize*/; + int bps, curr_bps; + enum decorrelation_type decorrelation; + + int32_t *decoded[MAX_CHANNELS]; + uint8_t *bitstream; + int bitstream_size; + int bitstream_index; + unsigned int allocated_bitstream_size; +} FLACContext; + +#define METADATA_TYPE_STREAMINFO 0 + +static int sample_rate_table[] = +{ 0, 0, 0, 0, + 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000, + 0, 0, 0, 0 }; + +static int sample_size_table[] = +{ 0, 8, 12, 0, 16, 20, 24, 0 }; + +static int blocksize_table[] = { + 0, 192, 576<<0, 576<<1, 576<<2, 576<<3, 0, 0, +256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7 +}; + +static int64_t get_utf8(GetBitContext *gb) +{ + uint64_t val; + int ones=0, bytes; + + while(get_bits1(gb)) + ones++; + + if (ones==0) bytes=0; + else if(ones==1) return -1; + else bytes= ones - 1; + + val= get_bits(gb, 7-ones); + while(bytes--){ + const int tmp = get_bits(gb, 8); + + if((tmp>>6) != 2) + return -1; + val<<=6; + val|= tmp&0x3F; + } + return val; +} + +#if 0 +static int skip_utf8(GetBitContext *gb) +{ + int ones=0, bytes; + + while(get_bits1(gb)) + ones++; + + if (ones==0) bytes=0; + else if(ones==1) return -1; + else bytes= ones - 1; + + skip_bits(gb, 7-ones); + while(bytes--){ + const int tmp = get_bits(gb, 8); + + if((tmp>>6) != 2) + return -1; + } + return 0; +} +#endif + +static void metadata_streaminfo(FLACContext *s); +static void dump_headers(FLACContext *s); + +static int flac_decode_init(AVCodecContext * avctx) +{ + FLACContext *s = avctx->priv_data; + s->avctx = avctx; + + /* initialize based on the demuxer-supplied streamdata header */ + if (avctx->extradata_size == FLAC_STREAMINFO_SIZE) { + init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8); + metadata_streaminfo(s); + dump_headers(s); + } + + return 0; +} + +static void dump_headers(FLACContext *s) +{ + av_log(s->avctx, AV_LOG_DEBUG, " Blocksize: %d .. %d (%d)\n", s->min_blocksize, s->max_blocksize, s->blocksize); + av_log(s->avctx, AV_LOG_DEBUG, " Framesize: %d .. %d\n", s->min_framesize, s->max_framesize); + av_log(s->avctx, AV_LOG_DEBUG, " Samplerate: %d\n", s->samplerate); + av_log(s->avctx, AV_LOG_DEBUG, " Channels: %d\n", s->channels); + av_log(s->avctx, AV_LOG_DEBUG, " Bits: %d\n", s->bps); +} + +static void allocate_buffers(FLACContext *s){ + int i; + + assert(s->max_blocksize); + + if(s->max_framesize == 0 && s->max_blocksize){ + s->max_framesize= (s->channels * s->bps * s->max_blocksize + 7)/ 8; //FIXME header overhead + } + + for (i = 0; i < s->channels; i++) + { + s->decoded[i] = av_realloc(s->decoded[i], sizeof(int32_t)*s->max_blocksize); + } + + s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize); +} + +static void metadata_streaminfo(FLACContext *s) +{ + /* mandatory streaminfo */ + s->min_blocksize = get_bits(&s->gb, 16); + s->max_blocksize = get_bits(&s->gb, 16); + + s->min_framesize = get_bits_long(&s->gb, 24); + s->max_framesize = get_bits_long(&s->gb, 24); + + s->samplerate = get_bits_long(&s->gb, 20); + s->channels = get_bits(&s->gb, 3) + 1; + s->bps = get_bits(&s->gb, 5) + 1; + + s->avctx->channels = s->channels; + s->avctx->sample_rate = s->samplerate; + + skip_bits(&s->gb, 36); /* total num of samples */ + + skip_bits(&s->gb, 64); /* md5 sum */ + skip_bits(&s->gb, 64); /* md5 sum */ + + allocate_buffers(s); +} + +static int decode_residuals(FLACContext *s, int channel, int pred_order) +{ + int i, tmp, partition, method_type, rice_order; + int sample = 0, samples; + + method_type = get_bits(&s->gb, 2); + if (method_type != 0){ + av_log(s->avctx, AV_LOG_DEBUG, "illegal residual coding method %d\n", method_type); + return -1; + } + + rice_order = get_bits(&s->gb, 4); + + samples= s->blocksize >> rice_order; + + sample= + i= pred_order; + for (partition = 0; partition < (1 << rice_order); partition++) + { + tmp = get_bits(&s->gb, 4); + if (tmp == 15) + { + av_log(s->avctx, AV_LOG_DEBUG, "fixed len partition\n"); + tmp = get_bits(&s->gb, 5); + for (; i < samples; i++, sample++) + s->decoded[channel][sample] = get_sbits(&s->gb, tmp); + } + else + { +// av_log(s->avctx, AV_LOG_DEBUG, "rice coded partition k=%d\n", tmp); + for (; i < samples; i++, sample++){ + s->decoded[channel][sample] = get_sr_golomb_flac(&s->gb, tmp, INT_MAX, 0); + } + } + i= 0; + } + +// av_log(s->avctx, AV_LOG_DEBUG, "partitions: %d, samples: %d\n", 1 << rice_order, sample); + + return 0; +} + +static int decode_subframe_fixed(FLACContext *s, int channel, int pred_order) +{ + int i; + +// av_log(s->avctx, AV_LOG_DEBUG, " SUBFRAME FIXED\n"); + + /* warm up samples */ +// av_log(s->avctx, AV_LOG_DEBUG, " warm up samples: %d\n", pred_order); + + for (i = 0; i < pred_order; i++) + { + s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps); +// av_log(s->avctx, AV_LOG_DEBUG, " %d: %d\n", i, s->decoded[channel][i]); + } + + if (decode_residuals(s, channel, pred_order) < 0) + return -1; + + switch(pred_order) + { + case 0: + break; + case 1: + for (i = pred_order; i < s->blocksize; i++) + s->decoded[channel][i] += s->decoded[channel][i-1]; + break; + case 2: + for (i = pred_order; i < s->blocksize; i++) + s->decoded[channel][i] += 2*s->decoded[channel][i-1] + - s->decoded[channel][i-2]; + break; + case 3: + for (i = pred_order; i < s->blocksize; i++) + s->decoded[channel][i] += 3*s->decoded[channel][i-1] + - 3*s->decoded[channel][i-2] + + s->decoded[channel][i-3]; + break; + case 4: + for (i = pred_order; i < s->blocksize; i++) + s->decoded[channel][i] += 4*s->decoded[channel][i-1] + - 6*s->decoded[channel][i-2] + + 4*s->decoded[channel][i-3] + - s->decoded[channel][i-4]; + break; + default: + av_log(s->avctx, AV_LOG_ERROR, "illegal pred order %d\n", pred_order); + return -1; + } + + return 0; +} + +static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order) +{ + int sum, i, j; + int coeff_prec, qlevel; + int coeffs[pred_order]; + +// av_log(s->avctx, AV_LOG_DEBUG, " SUBFRAME LPC\n"); + + /* warm up samples */ +// av_log(s->avctx, AV_LOG_DEBUG, " warm up samples: %d\n", pred_order); + + for (i = 0; i < pred_order; i++) + { + s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps); +// av_log(s->avctx, AV_LOG_DEBUG, " %d: %d\n", i, s->decoded[channel][i]); + } + + coeff_prec = get_bits(&s->gb, 4) + 1; + if (coeff_prec == 16) + { + av_log(s->avctx, AV_LOG_DEBUG, "invalid coeff precision\n"); + return -1; + } +// av_log(s->avctx, AV_LOG_DEBUG, " qlp coeff prec: %d\n", coeff_prec); + qlevel = get_sbits(&s->gb, 5); +// av_log(s->avctx, AV_LOG_DEBUG, " quant level: %d\n", qlevel); + if(qlevel < 0){ + av_log(s->avctx, AV_LOG_DEBUG, "qlevel %d not supported, maybe buggy stream\n", qlevel); + return -1; + } + + for (i = 0; i < pred_order; i++) + { + coeffs[i] = get_sbits(&s->gb, coeff_prec); +// av_log(s->avctx, AV_LOG_DEBUG, " %d: %d\n", i, coeffs[i]); + } + + if (decode_residuals(s, channel, pred_order) < 0) + return -1; + + for (i = pred_order; i < s->blocksize; i++) + { + sum = 0; + for (j = 0; j < pred_order; j++) + sum += coeffs[j] * s->decoded[channel][i-j-1]; + s->decoded[channel][i] += sum >> qlevel; + } + + return 0; +} + +static inline int decode_subframe(FLACContext *s, int channel) +{ + int type, wasted = 0; + int i, tmp; + + s->curr_bps = s->bps; + if(channel == 0){ + if(s->decorrelation == RIGHT_SIDE) + s->curr_bps++; + }else{ + if(s->decorrelation == LEFT_SIDE || s->decorrelation == MID_SIDE) + s->curr_bps++; + } + + if (get_bits1(&s->gb)) + { + av_log(s->avctx, AV_LOG_ERROR, "invalid subframe padding\n"); + return -1; + } + type = get_bits(&s->gb, 6); +// wasted = get_bits1(&s->gb); + +// if (wasted) +// { +// while (!get_bits1(&s->gb)) +// wasted++; +// if (wasted) +// wasted++; +// s->curr_bps -= wasted; +// } +#if 0 + wasted= 16 - av_log2(show_bits(&s->gb, 17)); + skip_bits(&s->gb, wasted+1); + s->curr_bps -= wasted; +#else + if (get_bits1(&s->gb)) + { + wasted = 1; + while (!get_bits1(&s->gb)) + wasted++; + s->curr_bps -= wasted; + av_log(s->avctx, AV_LOG_DEBUG, "%d wasted bits\n", wasted); + } +#endif +//FIXME use av_log2 for types + if (type == 0) + { + av_log(s->avctx, AV_LOG_DEBUG, "coding type: constant\n"); + tmp = get_sbits(&s->gb, s->curr_bps); + for (i = 0; i < s->blocksize; i++) + s->decoded[channel][i] = tmp; + } + else if (type == 1) + { + av_log(s->avctx, AV_LOG_DEBUG, "coding type: verbatim\n"); + for (i = 0; i < s->blocksize; i++) + s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps); + } + else if ((type >= 8) && (type <= 12)) + { +// av_log(s->avctx, AV_LOG_DEBUG, "coding type: fixed\n"); + if (decode_subframe_fixed(s, channel, type & ~0x8) < 0) + return -1; + } + else if (type >= 32) + { +// av_log(s->avctx, AV_LOG_DEBUG, "coding type: lpc\n"); + if (decode_subframe_lpc(s, channel, (type & ~0x20)+1) < 0) + return -1; + } + else + { + av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n"); + return -1; + } + + if (wasted) + { + int i; + for (i = 0; i < s->blocksize; i++) + s->decoded[channel][i] <<= wasted; + } + + return 0; +} + +static int decode_frame(FLACContext *s) +{ + int blocksize_code, sample_rate_code, sample_size_code, assignment, i, crc8; + int decorrelation, bps, blocksize, samplerate; + + blocksize_code = get_bits(&s->gb, 4); + + sample_rate_code = get_bits(&s->gb, 4); + + assignment = get_bits(&s->gb, 4); /* channel assignment */ + if (assignment < 8 && s->channels == assignment+1) + decorrelation = INDEPENDENT; + else if (assignment >=8 && assignment < 11 && s->channels == 2) + decorrelation = LEFT_SIDE + assignment - 8; + else + { + av_log(s->avctx, AV_LOG_ERROR, "unsupported channel assignment %d (channels=%d)\n", assignment, s->channels); + return -1; + } + + sample_size_code = get_bits(&s->gb, 3); + if(sample_size_code == 0) + bps= s->bps; + else if((sample_size_code != 3) && (sample_size_code != 7)) + bps = sample_size_table[sample_size_code]; + else + { + av_log(s->avctx, AV_LOG_ERROR, "invalid sample size code (%d)\n", sample_size_code); + return -1; + } + + if (get_bits1(&s->gb)) + { + av_log(s->avctx, AV_LOG_ERROR, "broken stream, invalid padding\n"); + return -1; + } + + if(get_utf8(&s->gb) < 0){ + av_log(s->avctx, AV_LOG_ERROR, "utf8 fscked\n"); + return -1; + } +#if 0 + if (/*((blocksize_code == 6) || (blocksize_code == 7)) &&*/ + (s->min_blocksize != s->max_blocksize)){ + }else{ + } +#endif + + if (blocksize_code == 0) + blocksize = s->min_blocksize; + else if (blocksize_code == 6) + blocksize = get_bits(&s->gb, 8)+1; + else if (blocksize_code == 7) + blocksize = get_bits(&s->gb, 16)+1; + else + blocksize = blocksize_table[blocksize_code]; + + if(blocksize > s->max_blocksize){ + av_log(s->avctx, AV_LOG_ERROR, "blocksize %d > %d\n", blocksize, s->max_blocksize); + return -1; + } + + if (sample_rate_code == 0){ + samplerate= s->samplerate; + }else if ((sample_rate_code > 3) && (sample_rate_code < 12)) + samplerate = sample_rate_table[sample_rate_code]; + else if (sample_rate_code == 12) + samplerate = get_bits(&s->gb, 8) * 1000; + else if (sample_rate_code == 13) + samplerate = get_bits(&s->gb, 16); + else if (sample_rate_code == 14) + samplerate = get_bits(&s->gb, 16) * 10; + else{ + av_log(s->avctx, AV_LOG_ERROR, "illegal sample rate code %d\n", sample_rate_code); + return -1; + } + + skip_bits(&s->gb, 8); + crc8= av_crc(av_crc07, 0, s->gb.buffer, get_bits_count(&s->gb)/8); + if(crc8){ + av_log(s->avctx, AV_LOG_ERROR, "header crc mismatch crc=%2X\n", crc8); + return -1; + } + + s->blocksize = blocksize; + s->samplerate = samplerate; + s->bps = bps; + s->decorrelation= decorrelation; + +// dump_headers(s); + + /* subframes */ + for (i = 0; i < s->channels; i++) + { +// av_log(s->avctx, AV_LOG_DEBUG, "decoded: %x residual: %x\n", s->decoded[i], s->residual[i]); + if (decode_subframe(s, i) < 0) + return -1; + } + + align_get_bits(&s->gb); + + /* frame footer */ + skip_bits(&s->gb, 16); /* data crc */ + + return 0; +} + +static int flac_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + FLACContext *s = avctx->priv_data; + int metadata_last, metadata_type, metadata_size; + int tmp = 0, i, j = 0, input_buf_size = 0; + int16_t *samples = data; + + if(s->max_framesize == 0){ + s->max_framesize= 65536; // should hopefully be enough for the first header + s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize); + } + + if(1 && s->max_framesize){//FIXME truncated + buf_size= FFMAX(FFMIN(buf_size, s->max_framesize - s->bitstream_size), 0); + input_buf_size= buf_size; + + if(s->bitstream_index + s->bitstream_size + buf_size > s->allocated_bitstream_size){ +// printf("memmove\n"); + memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size); + s->bitstream_index=0; + } + memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], buf, buf_size); + buf= &s->bitstream[s->bitstream_index]; + buf_size += s->bitstream_size; + s->bitstream_size= buf_size; + + if(buf_size < s->max_framesize){ +// printf("wanna more data ...\n"); + return input_buf_size; + } + } + + init_get_bits(&s->gb, buf, buf_size*8); + + /* fLaC signature (be) */ + if (show_bits_long(&s->gb, 32) == bswap_32(ff_get_fourcc("fLaC"))) + { + skip_bits(&s->gb, 32); + + av_log(s->avctx, AV_LOG_DEBUG, "STREAM HEADER\n"); + do { + metadata_last = get_bits(&s->gb, 1); + metadata_type = get_bits(&s->gb, 7); + metadata_size = get_bits_long(&s->gb, 24); + + av_log(s->avctx, AV_LOG_DEBUG, " metadata block: flag = %d, type = %d, size = %d\n", + metadata_last, metadata_type, + metadata_size); + if(metadata_size){ + switch(metadata_type) + { + case METADATA_TYPE_STREAMINFO:{ + metadata_streaminfo(s); + + /* Buffer might have been reallocated, reinit bitreader */ + if(buf != &s->bitstream[s->bitstream_index]) + { + int bits_count = get_bits_count(&s->gb); + buf= &s->bitstream[s->bitstream_index]; + init_get_bits(&s->gb, buf, buf_size*8); + skip_bits(&s->gb, bits_count); + } + + dump_headers(s); + break;} + default: + for(i=0; igb, 8); + } + } + } while(!metadata_last); + } + else + { + + tmp = show_bits(&s->gb, 16); + if(tmp != 0xFFF8){ + av_log(s->avctx, AV_LOG_ERROR, "FRAME HEADER not here\n"); + while(get_bits_count(&s->gb)/8+2 < buf_size && show_bits(&s->gb, 16) != 0xFFF8) + skip_bits(&s->gb, 8); + goto end; // we may not have enough bits left to decode a frame, so try next time + } + skip_bits(&s->gb, 16); + if (decode_frame(s) < 0){ + av_log(s->avctx, AV_LOG_ERROR, "decode_frame() failed\n"); + s->bitstream_size=0; + s->bitstream_index=0; + return -1; + } + } + + +#if 0 + /* fix the channel order here */ + if (s->order == MID_SIDE) + { + short *left = samples; + short *right = samples + s->blocksize; + for (i = 0; i < s->blocksize; i += 2) + { + uint32_t x = s->decoded[0][i]; + uint32_t y = s->decoded[0][i+1]; + + right[i] = x - (y / 2); + left[i] = right[i] + y; + } + *data_size = 2 * s->blocksize; + } + else + { + for (i = 0; i < s->channels; i++) + { + switch(s->order) + { + case INDEPENDENT: + for (j = 0; j < s->blocksize; j++) + samples[(s->blocksize*i)+j] = s->decoded[i][j]; + break; + case LEFT_SIDE: + case RIGHT_SIDE: + if (i == 0) + for (j = 0; j < s->blocksize; j++) + samples[(s->blocksize*i)+j] = s->decoded[0][j]; + else + for (j = 0; j < s->blocksize; j++) + samples[(s->blocksize*i)+j] = s->decoded[0][j] - s->decoded[i][j]; + break; +// case MID_SIDE: +// av_log(s->avctx, AV_LOG_DEBUG, "mid-side unsupported\n"); + } + *data_size += s->blocksize; + } + } +#else + switch(s->decorrelation) + { + case INDEPENDENT: + for (j = 0; j < s->blocksize; j++) + { + for (i = 0; i < s->channels; i++) + *(samples++) = s->decoded[i][j]; + } + break; + case LEFT_SIDE: + assert(s->channels == 2); + for (i = 0; i < s->blocksize; i++) + { + *(samples++) = s->decoded[0][i]; + *(samples++) = s->decoded[0][i] - s->decoded[1][i]; + } + break; + case RIGHT_SIDE: + assert(s->channels == 2); + for (i = 0; i < s->blocksize; i++) + { + *(samples++) = s->decoded[0][i] + s->decoded[1][i]; + *(samples++) = s->decoded[1][i]; + } + break; + case MID_SIDE: + assert(s->channels == 2); + for (i = 0; i < s->blocksize; i++) + { + int mid, side; + mid = s->decoded[0][i]; + side = s->decoded[1][i]; + +#if 1 //needs to be checked but IMHO it should be binary identical + mid -= side>>1; + *(samples++) = mid + side; + *(samples++) = mid; +#else + + mid <<= 1; + if (side & 1) + mid++; + *(samples++) = (mid + side) >> 1; + *(samples++) = (mid - side) >> 1; +#endif + } + break; + } +#endif + + *data_size = (int8_t *)samples - (int8_t *)data; +// av_log(s->avctx, AV_LOG_DEBUG, "data size: %d\n", *data_size); + +// s->last_blocksize = s->blocksize; +end: + i= (get_bits_count(&s->gb)+7)/8;; + if(i > buf_size){ + av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", i - buf_size); + s->bitstream_size=0; + s->bitstream_index=0; + return -1; + } + + if(s->bitstream_size){ + s->bitstream_index += i; + s->bitstream_size -= i; + return input_buf_size; + }else + return i; +} + +static int flac_decode_close(AVCodecContext *avctx) +{ + FLACContext *s = avctx->priv_data; + int i; + + for (i = 0; i < s->channels; i++) + { + av_freep(&s->decoded[i]); + } + av_freep(&s->bitstream); + + return 0; +} + +static void flac_flush(AVCodecContext *avctx){ + FLACContext *s = avctx->priv_data; + + s->bitstream_size= + s->bitstream_index= 0; +} + +AVCodec flac_decoder = { + "flac", + CODEC_TYPE_AUDIO, + CODEC_ID_FLAC, + sizeof(FLACContext), + flac_decode_init, + NULL, + flac_decode_close, + flac_decode_frame, + .flush= flac_flush, +}; diff --git a/mpeg4/src/libavcodec/flicvideo.c b/mpeg4/src/libavcodec/flicvideo.c new file mode 100644 index 0000000000000000000000000000000000000000..fa128d0d3b6e905544e72b583771d46d04ae1082 --- /dev/null +++ b/mpeg4/src/libavcodec/flicvideo.c @@ -0,0 +1,744 @@ +/* + * FLI/FLC Animation Video Decoder + * Copyright (C) 2003, 2004 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file flic.c + * Autodesk Animator FLI/FLC Video Decoder + * by Mike Melanson (melanson@pcisys.net) + * for more information on the .fli/.flc file format and all of its many + * variations, visit: + * http://www.compuphase.com/flic.htm + * + * This decoder outputs PAL8/RGB555/RGB565 and maybe one day RGB24 + * colorspace data, depending on the FLC. To use this decoder, be + * sure that your demuxer sends the FLI file header to the decoder via + * the extradata chunk in AVCodecContext. The chunk should be 128 bytes + * large. The only exception is for FLI files from the game "Magic Carpet", + * in which the header is only 12 bytes. + */ + +#include +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "bswap.h" + +#define FLI_256_COLOR 4 +#define FLI_DELTA 7 +#define FLI_COLOR 11 +#define FLI_LC 12 +#define FLI_BLACK 13 +#define FLI_BRUN 15 +#define FLI_COPY 16 +#define FLI_MINI 18 +#define FLI_DTA_BRUN 25 +#define FLI_DTA_COPY 26 +#define FLI_DTA_LC 27 + +#define FLI_TYPE_CODE (0xAF11) +#define FLC_FLX_TYPE_CODE (0xAF12) +#define FLC_DTA_TYPE_CODE (0xAF44) /* Marks an "Extended FLC" comes from Dave's Targa Animator (DTA) */ +#define FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE (0xAF13) + +#define CHECK_PIXEL_PTR(n) \ + if (pixel_ptr + n > pixel_limit) { \ + av_log (s->avctx, AV_LOG_INFO, "Problem: pixel_ptr >= pixel_limit (%d >= %d)\n", \ + pixel_ptr + n, pixel_limit); \ + return -1; \ + } \ + +typedef struct FlicDecodeContext { + AVCodecContext *avctx; + AVFrame frame; + + unsigned int palette[256]; + int new_palette; + int fli_type; /* either 0xAF11 or 0xAF12, affects palette resolution */ +} FlicDecodeContext; + +static int flic_decode_init(AVCodecContext *avctx) +{ + FlicDecodeContext *s = (FlicDecodeContext *)avctx->priv_data; + unsigned char *fli_header = (unsigned char *)avctx->extradata; + int depth; + + s->avctx = avctx; + avctx->has_b_frames = 0; + + s->fli_type = LE_16(&fli_header[4]); /* Might be overridden if a Magic Carpet FLC */ + depth = LE_16(&fli_header[12]); + + if (depth == 0) { + depth = 8; /* Some FLC generators set depth to zero, when they mean 8Bpp. Fix up here */ + } + + if (s->avctx->extradata_size == 12) { + /* special case for magic carpet FLIs */ + s->fli_type = FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE; + } else if (s->avctx->extradata_size != 128) { + av_log(avctx, AV_LOG_ERROR, "Expected extradata of 12 or 128 bytes\n"); + return -1; + } + + if ((s->fli_type == FLC_FLX_TYPE_CODE) && (depth == 16)) { + depth = 15; /* Original Autodesk FLX's say the depth is 16Bpp when it is really 15Bpp */ + } + + switch (depth) { + case 8 : avctx->pix_fmt = PIX_FMT_PAL8; break; + case 15 : avctx->pix_fmt = PIX_FMT_RGB555; break; + case 16 : avctx->pix_fmt = PIX_FMT_RGB565; break; + case 24 : avctx->pix_fmt = PIX_FMT_BGR24; /* Supposedly BGR, but havent any files to test with */ + av_log(avctx, AV_LOG_ERROR, "24Bpp FLC/FLX is unsupported due to no test files.\n"); + return -1; + break; + default : + av_log(avctx, AV_LOG_ERROR, "Unkown FLC/FLX depth of %d Bpp is unsupported.\n",depth); + return -1; + } + + s->frame.data[0] = NULL; + s->new_palette = 0; + + return 0; +} + +static int flic_decode_frame_8BPP(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + FlicDecodeContext *s = (FlicDecodeContext *)avctx->priv_data; + + int stream_ptr = 0; + int stream_ptr_after_color_chunk; + int pixel_ptr; + int palette_ptr; + unsigned char palette_idx1; + unsigned char palette_idx2; + + unsigned int frame_size; + int num_chunks; + + unsigned int chunk_size; + int chunk_type; + + int i, j; + + int color_packets; + int color_changes; + int color_shift; + unsigned char r, g, b; + + int lines; + int compressed_lines; + int starting_line; + signed short line_packets; + int y_ptr; + signed char byte_run; + int pixel_skip; + int pixel_countdown; + unsigned char *pixels; + int pixel_limit; + + s->frame.reference = 1; + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, &s->frame) < 0) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + + pixels = s->frame.data[0]; + pixel_limit = s->avctx->height * s->frame.linesize[0]; + + frame_size = LE_32(&buf[stream_ptr]); + stream_ptr += 6; /* skip the magic number */ + num_chunks = LE_16(&buf[stream_ptr]); + stream_ptr += 10; /* skip padding */ + + frame_size -= 16; + + /* iterate through the chunks */ + while ((frame_size > 0) && (num_chunks > 0)) { + chunk_size = LE_32(&buf[stream_ptr]); + stream_ptr += 4; + chunk_type = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + + switch (chunk_type) { + case FLI_256_COLOR: + case FLI_COLOR: + stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6; + s->new_palette = 1; + + /* check special case: If this file is from the Magic Carpet + * game and uses 6-bit colors even though it reports 256-color + * chunks in a 0xAF12-type file (fli_type is set to 0xAF13 during + * initialization) */ + if ((chunk_type == FLI_256_COLOR) && (s->fli_type != FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE)) + color_shift = 0; + else + color_shift = 2; + /* set up the palette */ + color_packets = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + palette_ptr = 0; + for (i = 0; i < color_packets; i++) { + /* first byte is how many colors to skip */ + palette_ptr += buf[stream_ptr++]; + + /* next byte indicates how many entries to change */ + color_changes = buf[stream_ptr++]; + + /* if there are 0 color changes, there are actually 256 */ + if (color_changes == 0) + color_changes = 256; + + for (j = 0; j < color_changes; j++) { + + /* wrap around, for good measure */ + if ((unsigned)palette_ptr >= 256) + palette_ptr = 0; + + r = buf[stream_ptr++] << color_shift; + g = buf[stream_ptr++] << color_shift; + b = buf[stream_ptr++] << color_shift; + s->palette[palette_ptr++] = (r << 16) | (g << 8) | b; + } + } + + /* color chunks sometimes have weird 16-bit alignment issues; + * therefore, take the hardline approach and set the stream_ptr + * to the value calculated w.r.t. the size specified by the color + * chunk header */ + stream_ptr = stream_ptr_after_color_chunk; + + break; + + case FLI_DELTA: + y_ptr = 0; + compressed_lines = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + while (compressed_lines > 0) { + line_packets = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + if (line_packets < 0) { + line_packets = -line_packets; + y_ptr += line_packets * s->frame.linesize[0]; + } else { + compressed_lines--; + pixel_ptr = y_ptr; + pixel_countdown = s->avctx->width; + for (i = 0; i < line_packets; i++) { + /* account for the skip bytes */ + pixel_skip = buf[stream_ptr++]; + pixel_ptr += pixel_skip; + pixel_countdown -= pixel_skip; + byte_run = buf[stream_ptr++]; + if (byte_run < 0) { + byte_run = -byte_run; + palette_idx1 = buf[stream_ptr++]; + palette_idx2 = buf[stream_ptr++]; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++, pixel_countdown -= 2) { + pixels[pixel_ptr++] = palette_idx1; + pixels[pixel_ptr++] = palette_idx2; + } + } else { + CHECK_PIXEL_PTR(byte_run * 2); + for (j = 0; j < byte_run * 2; j++, pixel_countdown--) { + palette_idx1 = buf[stream_ptr++]; + pixels[pixel_ptr++] = palette_idx1; + } + } + } + + y_ptr += s->frame.linesize[0]; + } + } + break; + + case FLI_LC: + /* line compressed */ + starting_line = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + y_ptr = 0; + y_ptr += starting_line * s->frame.linesize[0]; + + compressed_lines = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + while (compressed_lines > 0) { + pixel_ptr = y_ptr; + pixel_countdown = s->avctx->width; + line_packets = buf[stream_ptr++]; + if (line_packets > 0) { + for (i = 0; i < line_packets; i++) { + /* account for the skip bytes */ + pixel_skip = buf[stream_ptr++]; + pixel_ptr += pixel_skip; + pixel_countdown -= pixel_skip; + byte_run = buf[stream_ptr++]; + if (byte_run > 0) { + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++, pixel_countdown--) { + palette_idx1 = buf[stream_ptr++]; + pixels[pixel_ptr++] = palette_idx1; + } + } else { + byte_run = -byte_run; + palette_idx1 = buf[stream_ptr++]; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++, pixel_countdown--) { + pixels[pixel_ptr++] = palette_idx1; + } + } + } + } + + y_ptr += s->frame.linesize[0]; + compressed_lines--; + } + break; + + case FLI_BLACK: + /* set the whole frame to color 0 (which is usually black) */ + memset(pixels, 0, + s->frame.linesize[0] * s->avctx->height); + break; + + case FLI_BRUN: + /* Byte run compression: This chunk type only occurs in the first + * FLI frame and it will update the entire frame. */ + y_ptr = 0; + for (lines = 0; lines < s->avctx->height; lines++) { + pixel_ptr = y_ptr; + /* disregard the line packets; instead, iterate through all + * pixels on a row */ + stream_ptr++; + pixel_countdown = s->avctx->width; + while (pixel_countdown > 0) { + byte_run = buf[stream_ptr++]; + if (byte_run > 0) { + palette_idx1 = buf[stream_ptr++]; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++) { + pixels[pixel_ptr++] = palette_idx1; + pixel_countdown--; + if (pixel_countdown < 0) + av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n", + pixel_countdown); + } + } else { /* copy bytes if byte_run < 0 */ + byte_run = -byte_run; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++) { + palette_idx1 = buf[stream_ptr++]; + pixels[pixel_ptr++] = palette_idx1; + pixel_countdown--; + if (pixel_countdown < 0) + av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n", + pixel_countdown); + } + } + } + + y_ptr += s->frame.linesize[0]; + } + break; + + case FLI_COPY: + /* copy the chunk (uncompressed frame) */ + if (chunk_size - 6 > s->avctx->width * s->avctx->height) { + av_log(avctx, AV_LOG_ERROR, "In chunk FLI_COPY : source data (%d bytes) " \ + "bigger than image, skipping chunk\n", chunk_size - 6); + stream_ptr += chunk_size - 6; + } else { + for (y_ptr = 0; y_ptr < s->frame.linesize[0] * s->avctx->height; + y_ptr += s->frame.linesize[0]) { + memcpy(&pixels[y_ptr], &buf[stream_ptr], + s->avctx->width); + stream_ptr += s->avctx->width; + } + } + break; + + case FLI_MINI: + /* some sort of a thumbnail? disregard this chunk... */ + stream_ptr += chunk_size - 6; + break; + + default: + av_log(avctx, AV_LOG_ERROR, "Unrecognized chunk type: %d\n", chunk_type); + break; + } + + frame_size -= chunk_size; + num_chunks--; + } + + /* by the end of the chunk, the stream ptr should equal the frame + * size (minus 1, possibly); if it doesn't, issue a warning */ + if ((stream_ptr != buf_size) && (stream_ptr != buf_size - 1)) + av_log(avctx, AV_LOG_ERROR, "Processed FLI chunk where chunk size = %d " \ + "and final chunk ptr = %d\n", buf_size, stream_ptr); + + /* make the palette available on the way out */ +// if (s->new_palette) { + if (1) { + memcpy(s->frame.data[1], s->palette, AVPALETTE_SIZE); + s->frame.palette_has_changed = 1; + s->new_palette = 0; + } + + *data_size=sizeof(AVFrame); + *(AVFrame*)data = s->frame; + + return buf_size; +} + +static int flic_decode_frame_15_16BPP(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + /* Note, the only difference between the 15Bpp and 16Bpp */ + /* Format is the pixel format, the packets are processed the same. */ + FlicDecodeContext *s = (FlicDecodeContext *)avctx->priv_data; + + int stream_ptr = 0; + int pixel_ptr; + unsigned char palette_idx1; + + unsigned int frame_size; + int num_chunks; + + unsigned int chunk_size; + int chunk_type; + + int i, j; + + int lines; + int compressed_lines; + signed short line_packets; + int y_ptr; + signed char byte_run; + int pixel_skip; + int pixel_countdown; + unsigned char *pixels; + int pixel; + int pixel_limit; + + s->frame.reference = 1; + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, &s->frame) < 0) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + + pixels = s->frame.data[0]; + pixel_limit = s->avctx->height * s->frame.linesize[0]; + + frame_size = LE_32(&buf[stream_ptr]); + stream_ptr += 6; /* skip the magic number */ + num_chunks = LE_16(&buf[stream_ptr]); + stream_ptr += 10; /* skip padding */ + + frame_size -= 16; + + /* iterate through the chunks */ + while ((frame_size > 0) && (num_chunks > 0)) { + chunk_size = LE_32(&buf[stream_ptr]); + stream_ptr += 4; + chunk_type = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + + switch (chunk_type) { + case FLI_256_COLOR: + case FLI_COLOR: + /* For some reason, it seems that non-paletised flics do include one of these */ + /* chunks in their first frame. Why i do not know, it seems rather extraneous */ +/* av_log(avctx, AV_LOG_ERROR, "Unexpected Palette chunk %d in non-paletised FLC\n",chunk_type);*/ + stream_ptr = stream_ptr + chunk_size - 6; + break; + + case FLI_DELTA: + case FLI_DTA_LC: + y_ptr = 0; + compressed_lines = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + while (compressed_lines > 0) { + line_packets = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + if (line_packets < 0) { + line_packets = -line_packets; + y_ptr += line_packets * s->frame.linesize[0]; + } else { + compressed_lines--; + pixel_ptr = y_ptr; + pixel_countdown = s->avctx->width; + for (i = 0; i < line_packets; i++) { + /* account for the skip bytes */ + pixel_skip = buf[stream_ptr++]; + pixel_ptr += (pixel_skip*2); /* Pixel is 2 bytes wide */ + pixel_countdown -= pixel_skip; + byte_run = buf[stream_ptr++]; + if (byte_run < 0) { + byte_run = -byte_run; + pixel = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++, pixel_countdown -= 2) { + *((signed short*)(&pixels[pixel_ptr])) = pixel; + pixel_ptr += 2; + } + } else { + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++, pixel_countdown--) { + *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + pixel_ptr += 2; + } + } + } + + y_ptr += s->frame.linesize[0]; + } + } + break; + + case FLI_LC: + av_log(avctx, AV_LOG_ERROR, "Unexpected FLI_LC chunk in non-paletised FLC\n"); + stream_ptr = stream_ptr + chunk_size - 6; + break; + + case FLI_BLACK: + /* set the whole frame to 0x0000 which is balck in both 15Bpp and 16Bpp modes. */ + memset(pixels, 0x0000, + s->frame.linesize[0] * s->avctx->height * 2); + break; + + case FLI_BRUN: + y_ptr = 0; + for (lines = 0; lines < s->avctx->height; lines++) { + pixel_ptr = y_ptr; + /* disregard the line packets; instead, iterate through all + * pixels on a row */ + stream_ptr++; + pixel_countdown = (s->avctx->width * 2); + + while (pixel_countdown > 0) { + byte_run = buf[stream_ptr++]; + if (byte_run > 0) { + palette_idx1 = buf[stream_ptr++]; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++) { + pixels[pixel_ptr++] = palette_idx1; + pixel_countdown--; + if (pixel_countdown < 0) + av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n", + pixel_countdown); + } + } else { /* copy bytes if byte_run < 0 */ + byte_run = -byte_run; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++) { + palette_idx1 = buf[stream_ptr++]; + pixels[pixel_ptr++] = palette_idx1; + pixel_countdown--; + if (pixel_countdown < 0) + av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n", + pixel_countdown); + } + } + } + + /* Now FLX is strange, in that it is "byte" as opposed to "pixel" run length compressed. + * This doesnt give us any good oportunity to perform word endian conversion + * during decompression. So if its requried (ie, this isnt a LE target, we do + * a second pass over the line here, swapping the bytes. + */ + pixel = 0xFF00; + if (0xFF00 != LE_16(&pixel)) /* Check if its not an LE Target */ + { + pixel_ptr = y_ptr; + pixel_countdown = s->avctx->width; + while (pixel_countdown > 0) { + *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[pixel_ptr]); + pixel_ptr += 2; + } + } + y_ptr += s->frame.linesize[0]; + } + break; + + case FLI_DTA_BRUN: + y_ptr = 0; + for (lines = 0; lines < s->avctx->height; lines++) { + pixel_ptr = y_ptr; + /* disregard the line packets; instead, iterate through all + * pixels on a row */ + stream_ptr++; + pixel_countdown = s->avctx->width; /* Width is in pixels, not bytes */ + + while (pixel_countdown > 0) { + byte_run = buf[stream_ptr++]; + if (byte_run > 0) { + pixel = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++) { + *((signed short*)(&pixels[pixel_ptr])) = pixel; + pixel_ptr += 2; + pixel_countdown--; + if (pixel_countdown < 0) + av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n", + pixel_countdown); + } + } else { /* copy pixels if byte_run < 0 */ + byte_run = -byte_run; + CHECK_PIXEL_PTR(byte_run); + for (j = 0; j < byte_run; j++) { + *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[stream_ptr]); + stream_ptr += 2; + pixel_ptr += 2; + pixel_countdown--; + if (pixel_countdown < 0) + av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n", + pixel_countdown); + } + } + } + + y_ptr += s->frame.linesize[0]; + } + break; + + case FLI_COPY: + case FLI_DTA_COPY: + /* copy the chunk (uncompressed frame) */ + if (chunk_size - 6 > (unsigned int)(s->avctx->width * s->avctx->height)*2) { + av_log(avctx, AV_LOG_ERROR, "In chunk FLI_COPY : source data (%d bytes) " \ + "bigger than image, skipping chunk\n", chunk_size - 6); + stream_ptr += chunk_size - 6; + } else { + + for (y_ptr = 0; y_ptr < s->frame.linesize[0] * s->avctx->height; + y_ptr += s->frame.linesize[0]) { + + pixel_countdown = s->avctx->width; + pixel_ptr = 0; + while (pixel_countdown > 0) { + *((signed short*)(&pixels[y_ptr + pixel_ptr])) = LE_16(&buf[stream_ptr+pixel_ptr]); + pixel_ptr += 2; + pixel_countdown--; + } + stream_ptr += s->avctx->width*2; + } + } + break; + + case FLI_MINI: + /* some sort of a thumbnail? disregard this chunk... */ + stream_ptr += chunk_size - 6; + break; + + default: + av_log(avctx, AV_LOG_ERROR, "Unrecognized chunk type: %d\n", chunk_type); + break; + } + + frame_size -= chunk_size; + num_chunks--; + } + + /* by the end of the chunk, the stream ptr should equal the frame + * size (minus 1, possibly); if it doesn't, issue a warning */ + if ((stream_ptr != buf_size) && (stream_ptr != buf_size - 1)) + av_log(avctx, AV_LOG_ERROR, "Processed FLI chunk where chunk size = %d " \ + "and final chunk ptr = %d\n", buf_size, stream_ptr); + + + *data_size=sizeof(AVFrame); + *(AVFrame*)data = s->frame; + + return buf_size; +} + +static int flic_decode_frame_24BPP(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + av_log(avctx, AV_LOG_ERROR, "24Bpp FLC Unsupported due to lack of test files.\n"); + return -1; +} + +static int flic_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + if (avctx->pix_fmt == PIX_FMT_PAL8) { + return flic_decode_frame_8BPP(avctx, data, data_size, + buf, buf_size); + } + else if ((avctx->pix_fmt == PIX_FMT_RGB555) || + (avctx->pix_fmt == PIX_FMT_RGB565)) { + return flic_decode_frame_15_16BPP(avctx, data, data_size, + buf, buf_size); + } + else if (avctx->pix_fmt == PIX_FMT_BGR24) { + return flic_decode_frame_24BPP(avctx, data, data_size, + buf, buf_size); + } + + /* Shouldnt get here, ever as the pix_fmt is processed */ + /* in flic_decode_init and the above if should deal with */ + /* the finite set of possibilites allowable by here. */ + /* but in case we do, just error out. */ + av_log(avctx, AV_LOG_ERROR, "Unknown Format of FLC. My Science cant explain how this happened\n"); + return -1; +} + + +static int flic_decode_end(AVCodecContext *avctx) +{ + FlicDecodeContext *s = avctx->priv_data; + + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + return 0; +} + +AVCodec flic_decoder = { + "flic", + CODEC_TYPE_VIDEO, + CODEC_ID_FLIC, + sizeof(FlicDecodeContext), + flic_decode_init, + NULL, + flic_decode_end, + flic_decode_frame, + CODEC_CAP_DR1, + NULL, + NULL, + NULL, + NULL +}; diff --git a/mpeg4/src/libavcodec/fraps.c b/mpeg4/src/libavcodec/fraps.c new file mode 100644 index 0000000000000000000000000000000000000000..d107e47b117eeb10a0be65c7afd0a1eb171b24a3 --- /dev/null +++ b/mpeg4/src/libavcodec/fraps.c @@ -0,0 +1,248 @@ +/* + * Fraps FPS1 decoder + * Copyright (c) 2005 Roine Gustafsson + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file fraps.c + * Lossless Fraps 'FPS1' decoder + * @author Roine Gustafsson + * + * Only decodes version 0 and 1 files. + * Codec algorithm for version 0 is taken from Transcode + * + * Version 2 files, which are the most commonly found Fraps files, cannot be + * decoded yet. + */ + +#include "avcodec.h" + +#define FPS_TAG MKTAG('F', 'P', 'S', 'x') + +/** + * local variable storage + */ +typedef struct FrapsContext{ + AVCodecContext *avctx; + AVFrame frame; +} FrapsContext; + + +/** + * initializes decoder + * @param avctx codec context + * @return 0 on success or negative if fails + */ +static int decode_init(AVCodecContext *avctx) +{ + FrapsContext * const s = avctx->priv_data; + + avctx->coded_frame = (AVFrame*)&s->frame; + avctx->has_b_frames = 0; + avctx->pix_fmt= PIX_FMT_NONE; /* set in decode_frame */ + + s->avctx = avctx; + s->frame.data[0] = NULL; + + return 0; +} + + +/** + * decode a frame + * @param avctx codec context + * @param data output AVFrame + * @param data_size size of output data or 0 if no picture is returned + * @param buf input data frame + * @param buf_size size of input data frame + * @return number of consumed bytes on success or negative if decode fails + */ +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + FrapsContext * const s = avctx->priv_data; + AVFrame *frame = data; + AVFrame * const f = (AVFrame*)&s->frame; + uint32_t header; + unsigned int version,header_size; + unsigned int x, y; + uint32_t *buf32; + uint32_t *luma1,*luma2,*cb,*cr; + + + header = LE_32(buf); + version = header & 0xff; + header_size = (header & (1<<30))? 8 : 4; /* bit 30 means pad to 8 bytes */ + + if (version > 1) { + av_log(avctx, AV_LOG_ERROR, + "This file is encoded with Fraps version %d. " \ + "This codec can only decode version 0 and 1.\n", version); + return -1; + } + + buf+=4; + if (header_size == 8) + buf+=4; + + switch(version) { + case 0: + default: + /* Fraps v0 is a reordered YUV420 */ + avctx->pix_fmt = PIX_FMT_YUV420P; + + if ( (buf_size != avctx->width*avctx->height*3/2+header_size) && + (buf_size != header_size) ) { + av_log(avctx, AV_LOG_ERROR, + "Invalid frame length %d (should be %d)\n", + buf_size, avctx->width*avctx->height*3/2+header_size); + return -1; + } + + if (( (avctx->width % 8) != 0) || ( (avctx->height % 2) != 0 )) { + av_log(avctx, AV_LOG_ERROR, "Invalid frame size %dx%d\n", + avctx->width, avctx->height); + return -1; + } + + f->reference = 1; + f->buffer_hints = FF_BUFFER_HINTS_VALID | + FF_BUFFER_HINTS_PRESERVE | + FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, f)) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + /* bit 31 means same as previous pic */ + f->pict_type = (header & (1<<31))? FF_P_TYPE : FF_I_TYPE; + f->key_frame = f->pict_type == FF_I_TYPE; + + if (f->pict_type == FF_I_TYPE) { + buf32=(uint32_t*)buf; + for(y=0; yheight/2; y++){ + luma1=(uint32_t*)&f->data[0][ y*2*f->linesize[0] ]; + luma2=(uint32_t*)&f->data[0][ (y*2+1)*f->linesize[0] ]; + cr=(uint32_t*)&f->data[1][ y*f->linesize[1] ]; + cb=(uint32_t*)&f->data[2][ y*f->linesize[2] ]; + for(x=0; xwidth; x+=8){ + *(luma1++) = *(buf32++); + *(luma1++) = *(buf32++); + *(luma2++) = *(buf32++); + *(luma2++) = *(buf32++); + *(cr++) = *(buf32++); + *(cb++) = *(buf32++); + } + } + } + break; + + case 1: + /* Fraps v1 is an upside-down BGR24 */ + avctx->pix_fmt = PIX_FMT_BGR24; + + if ( (buf_size != avctx->width*avctx->height*3+header_size) && + (buf_size != header_size) ) { + av_log(avctx, AV_LOG_ERROR, + "Invalid frame length %d (should be %d)\n", + buf_size, avctx->width*avctx->height*3+header_size); + return -1; + } + + f->reference = 1; + f->buffer_hints = FF_BUFFER_HINTS_VALID | + FF_BUFFER_HINTS_PRESERVE | + FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, f)) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + /* bit 31 means same as previous pic */ + f->pict_type = (header & (1<<31))? FF_P_TYPE : FF_I_TYPE; + f->key_frame = f->pict_type == FF_I_TYPE; + + if (f->pict_type == FF_I_TYPE) { + for(y=0; yheight; y++) + memcpy(&f->data[0][ (avctx->height-y)*f->linesize[0] ], + &buf[y*avctx->width*3], + f->linesize[0]); + } + break; + + case 2: + /** + * Fraps v2 sub-header description. All numbers are little-endian: + * (this is all guesswork) + * + * 0: DWORD 'FPSx' + * 4: DWORD 0x00000010 unknown, perhaps flags + * 8: DWORD off_2 offset to plane 2 + * 12: DWORD off_3 offset to plane 3 + * 16: 256xDWORD freqtbl_1 frequency table for plane 1 + * 1040: plane_1 + * ... + * off_2: 256xDWORD freqtbl_2 frequency table for plane 2 + * plane_2 + * ... + * off_3: 256xDWORD freqtbl_3 frequency table for plane 3 + * plane_3 + */ + if ((BE_32(buf) != FPS_TAG)||(buf_size < (3*1024 + 8))) { + av_log(avctx, AV_LOG_ERROR, "Fraps: error in data stream\n"); + return -1; + } + + /* NOT FINISHED */ + + break; + } + + *frame = *f; + *data_size = sizeof(AVFrame); + + return buf_size; +} + + +/** + * closes decoder + * @param avctx codec context + * @return 0 on success or negative if fails + */ +static int decode_end(AVCodecContext *avctx) +{ + FrapsContext *s = (FrapsContext*)avctx->priv_data; + + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + return 0; +} + + +AVCodec fraps_decoder = { + "fraps", + CODEC_TYPE_VIDEO, + CODEC_ID_FRAPS, + sizeof(FrapsContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/g726.c b/mpeg4/src/libavcodec/g726.c new file mode 100644 index 0000000000000000000000000000000000000000..8114fe0f324bdfbea25ea6051a97bc29c9742dae --- /dev/null +++ b/mpeg4/src/libavcodec/g726.c @@ -0,0 +1,424 @@ +/* + * G.726 ADPCM audio codec + * Copyright (c) 2004 Roman Shaposhnik. + * + * This is a very straightforward rendition of the G.726 + * Section 4 "Computational Details". + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include "avcodec.h" +#include "common.h" +#include "bitstream.h" + +/** + * G.726 11bit float. + * G.726 Standard uses rather odd 11bit floating point arithmentic for + * numerous occasions. It's a mistery to me why they did it this way + * instead of simply using 32bit integer arithmetic. + */ +typedef struct Float11 { + int sign; /**< 1bit sign */ + int exp; /**< 4bit exponent */ + int mant; /**< 6bit mantissa */ +} Float11; + +static inline Float11* i2f(int16_t i, Float11* f) +{ + f->sign = (i < 0); + if (f->sign) + i = -i; + f->exp = av_log2_16bit(i) + !!i; + f->mant = i? (i<<6) >> f->exp : + 1<<5; + return f; +} + +static inline int16_t mult(Float11* f1, Float11* f2) +{ + int res, exp; + + exp = f1->exp + f2->exp; + res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7; + res = exp > 26 ? res << (exp - 26) : res >> (26 - exp); + return (f1->sign ^ f2->sign) ? -res : res; +} + +static inline int sgn(int value) +{ + return (value < 0) ? -1 : 1; +} + +typedef struct G726Tables { + int bits; /**< bits per sample */ + int* quant; /**< quantization table */ + int* iquant; /**< inverse quantization table */ + int* W; /**< special table #1 ;-) */ + int* F; /**< special table #2 */ +} G726Tables; + +typedef struct G726Context { + G726Tables* tbls; /**< static tables needed for computation */ + + Float11 sr[2]; /**< prev. reconstructed samples */ + Float11 dq[6]; /**< prev. difference */ + int a[2]; /**< second order predictor coeffs */ + int b[6]; /**< sixth order predictor coeffs */ + int pk[2]; /**< signs of prev. 2 sez + dq */ + + int ap; /**< scale factor control */ + int yu; /**< fast scale factor */ + int yl; /**< slow scale factor */ + int dms; /**< short average magnitude of F[i] */ + int dml; /**< long average magnitude of F[i] */ + int td; /**< tone detect */ + + int se; /**< estimated signal for the next iteration */ + int sez; /**< estimated second order prediction */ + int y; /**< quantizer scaling factor for the next iteration */ +} G726Context; + +static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */ + { 260, INT_MAX }; +static int iquant_tbl16[] = + { 116, 365, 365, 116 }; +static int W_tbl16[] = + { -22, 439, 439, -22 }; +static int F_tbl16[] = + { 0, 7, 7, 0 }; + +static int quant_tbl24[] = /**< 24kbit/s 3bits per sample */ + { 7, 217, 330, INT_MAX }; +static int iquant_tbl24[] = + { INT_MIN, 135, 273, 373, 373, 273, 135, INT_MIN }; +static int W_tbl24[] = + { -4, 30, 137, 582, 582, 137, 30, -4 }; +static int F_tbl24[] = + { 0, 1, 2, 7, 7, 2, 1, 0 }; + +static int quant_tbl32[] = /**< 32kbit/s 4bits per sample */ + { -125, 79, 177, 245, 299, 348, 399, INT_MAX }; +static int iquant_tbl32[] = + { INT_MIN, 4, 135, 213, 273, 323, 373, 425, + 425, 373, 323, 273, 213, 135, 4, INT_MIN }; +static int W_tbl32[] = + { -12, 18, 41, 64, 112, 198, 355, 1122, + 1122, 355, 198, 112, 64, 41, 18, -12}; +static int F_tbl32[] = + { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 }; + +static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */ + { -122, -16, 67, 138, 197, 249, 297, 338, + 377, 412, 444, 474, 501, 527, 552, INT_MAX }; +static int iquant_tbl40[] = + { INT_MIN, -66, 28, 104, 169, 224, 274, 318, + 358, 395, 429, 459, 488, 514, 539, 566, + 566, 539, 514, 488, 459, 429, 395, 358, + 318, 274, 224, 169, 104, 28, -66, INT_MIN }; +static int W_tbl40[] = + { 14, 14, 24, 39, 40, 41, 58, 100, + 141, 179, 219, 280, 358, 440, 529, 696, + 696, 529, 440, 358, 280, 219, 179, 141, + 100, 58, 41, 40, 39, 24, 14, 14 }; +static int F_tbl40[] = + { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6, + 6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; + +static G726Tables G726Tables_pool[] = + {{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 }, + { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 }, + { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 }, + { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }}; + + +/** + * Para 4.2.2 page 18: Adaptive quantizer. + */ +static inline uint8_t quant(G726Context* c, int d) +{ + int sign, exp, i, dln; + + sign = i = 0; + if (d < 0) { + sign = 1; + d = -d; + } + exp = av_log2_16bit(d); + dln = ((exp<<7) + (((d<<7)>>exp)&0x7f)) - (c->y>>2); + + while (c->tbls->quant[i] < INT_MAX && c->tbls->quant[i] < dln) + ++i; + + if (sign) + i = ~i; + if (c->tbls->bits != 2 && i == 0) /* I'm not sure this is a good idea */ + i = 0xff; + + return i; +} + +/** + * Para 4.2.3 page 22: Inverse adaptive quantizer. + */ +static inline int16_t inverse_quant(G726Context* c, int i) +{ + int dql, dex, dqt; + + dql = c->tbls->iquant[i] + (c->y >> 2); + dex = (dql>>7) & 0xf; /* 4bit exponent */ + dqt = (1<<7) + (dql & 0x7f); /* log2 -> linear */ + return (dql < 0) ? 0 : ((dqt<<7) >> (14-dex)); +} + +static inline int16_t g726_iterate(G726Context* c, int16_t I) +{ + int dq, re_signal, pk0, fa1, i, tr, ylint, ylfrac, thr2, al, dq0; + Float11 f; + + dq = inverse_quant(c, I); + if (I >> (c->tbls->bits - 1)) /* get the sign */ + dq = -dq; + re_signal = c->se + dq; + + /* Transition detect */ + ylint = (c->yl >> 15); + ylfrac = (c->yl >> 10) & 0x1f; + thr2 = (ylint > 9) ? 0x1f << 10 : (0x20 + ylfrac) << ylint; + if (c->td == 1 && abs(dq) > ((thr2+(thr2>>1))>>1)) + tr = 1; + else + tr = 0; + + /* Update second order predictor coefficient A2 and A1 */ + pk0 = (c->sez + dq) ? sgn(c->sez + dq) : 0; + dq0 = dq ? sgn(dq) : 0; + if (tr) { + c->a[0] = 0; + c->a[1] = 0; + for (i=0; i<6; i++) + c->b[i] = 0; + } else { + /* This is a bit crazy, but it really is +255 not +256 */ + fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255); + + c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7); + c->a[1] = clip(c->a[1], -12288, 12288); + c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8); + c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]); + + for (i=0; i<6; i++) + c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8); + } + + /* Update Dq and Sr and Pk */ + c->pk[1] = c->pk[0]; + c->pk[0] = pk0 ? pk0 : 1; + c->sr[1] = c->sr[0]; + i2f(re_signal, &c->sr[0]); + for (i=5; i>0; i--) + c->dq[i] = c->dq[i-1]; + i2f(dq, &c->dq[0]); + c->dq[0].sign = I >> (c->tbls->bits - 1); /* Isn't it crazy ?!?! */ + + /* Update tone detect [I'm not sure 'tr == 0' is really needed] */ + c->td = (tr == 0 && c->a[1] < -11776); + + /* Update Ap */ + c->dms += ((c->tbls->F[I]<<9) - c->dms) >> 5; + c->dml += ((c->tbls->F[I]<<11) - c->dml) >> 7; + if (tr) + c->ap = 256; + else if (c->y > 1535 && !c->td && (abs((c->dms << 2) - c->dml) < (c->dml >> 3))) + c->ap += (-c->ap) >> 4; + else + c->ap += (0x200 - c->ap) >> 4; + + /* Update Yu and Yl */ + c->yu = clip(c->y + (((c->tbls->W[I] << 5) - c->y) >> 5), 544, 5120); + c->yl += c->yu + ((-c->yl)>>6); + + /* Next iteration for Y */ + al = (c->ap >= 256) ? 1<<6 : c->ap >> 2; + c->y = (c->yl + (c->yu - (c->yl>>6))*al) >> 6; + + /* Next iteration for SE and SEZ */ + c->se = 0; + for (i=0; i<6; i++) + c->se += mult(i2f(c->b[i] >> 2, &f), &c->dq[i]); + c->sez = c->se >> 1; + for (i=0; i<2; i++) + c->se += mult(i2f(c->a[i] >> 2, &f), &c->sr[i]); + c->se >>= 1; + + return clip(re_signal << 2, -0xffff, 0xffff); +} + +static int g726_reset(G726Context* c, int bit_rate) +{ + int i; + + c->tbls = &G726Tables_pool[bit_rate/8000 - 2]; + for (i=0; i<2; i++) { + i2f(0, &c->sr[i]); + c->a[i] = 0; + c->pk[i] = 1; + } + for (i=0; i<6; i++) { + i2f(0, &c->dq[i]); + c->b[i] = 0; + } + c->ap = 0; + c->dms = 0; + c->dml = 0; + c->yu = 544; + c->yl = 34816; + c->td = 0; + + c->se = 0; + c->sez = 0; + c->y = 544; + + return 0; +} + +static int16_t g726_decode(G726Context* c, int16_t i) +{ + return g726_iterate(c, i); +} + +static int16_t g726_encode(G726Context* c, int16_t sig) +{ + uint8_t i; + + i = quant(c, sig/4 - c->se) & ((1<tbls->bits) - 1); + g726_iterate(c, i); + return i; +} + +/* Interfacing to the libavcodec */ + +typedef struct AVG726Context { + G726Context c; + int bits_left; + int bit_buffer; + int code_size; +} AVG726Context; + +static int g726_init(AVCodecContext * avctx) +{ + AVG726Context* c = (AVG726Context*)avctx->priv_data; + + if (avctx->channels != 1 || + (avctx->bit_rate != 16000 && avctx->bit_rate != 24000 && + avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) { + av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); + return -1; + } + if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) { + av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n"); + return -1; + } + g726_reset(&c->c, avctx->bit_rate); + c->code_size = c->c.tbls->bits; + c->bit_buffer = 0; + c->bits_left = 0; + + avctx->coded_frame = avcodec_alloc_frame(); + if (!avctx->coded_frame) + return -ENOMEM; + avctx->coded_frame->key_frame = 1; + + return 0; +} + +static int g726_close(AVCodecContext *avctx) +{ + av_freep(&avctx->coded_frame); + return 0; +} + +static int g726_encode_frame(AVCodecContext *avctx, + uint8_t *dst, int buf_size, void *data) +{ + AVG726Context *c = avctx->priv_data; + short *samples = data; + PutBitContext pb; + + init_put_bits(&pb, dst, 1024*1024); + + for (; buf_size; buf_size--) + put_bits(&pb, c->code_size, g726_encode(&c->c, *samples++)); + + flush_put_bits(&pb); + + return put_bits_count(&pb)>>3; +} + +static int g726_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + AVG726Context *c = avctx->priv_data; + short *samples = data; + uint8_t code; + uint8_t mask; + GetBitContext gb; + + if (!buf_size) + goto out; + + mask = (1<code_size) - 1; + init_get_bits(&gb, buf, buf_size * 8); + if (c->bits_left) { + int s = c->code_size - c->bits_left;; + code = (c->bit_buffer << s) | get_bits(&gb, s); + *samples++ = g726_decode(&c->c, code & mask); + } + + while (get_bits_count(&gb) + c->code_size <= buf_size*8) + *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask); + + c->bits_left = buf_size*8 - get_bits_count(&gb); + c->bit_buffer = get_bits(&gb, c->bits_left); + +out: + *data_size = (uint8_t*)samples - (uint8_t*)data; + return buf_size; +} + +#ifdef CONFIG_ENCODERS +AVCodec adpcm_g726_encoder = { + "g726", + CODEC_TYPE_AUDIO, + CODEC_ID_ADPCM_G726, + sizeof(AVG726Context), + g726_init, + g726_encode_frame, + g726_close, + NULL, +}; +#endif //CONFIG_ENCODERS + +AVCodec adpcm_g726_decoder = { + "g726", + CODEC_TYPE_AUDIO, + CODEC_ID_ADPCM_G726, + sizeof(AVG726Context), + g726_init, + NULL, + g726_close, + g726_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/golomb.c b/mpeg4/src/libavcodec/golomb.c new file mode 100644 index 0000000000000000000000000000000000000000..c140b8b07d58b13151cb0e521446e23479ceafe3 --- /dev/null +++ b/mpeg4/src/libavcodec/golomb.c @@ -0,0 +1,154 @@ +/* + * exp golomb vlc stuff + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file golomb.c + * @brief + * exp golomb vlc stuff + * @author Michael Niedermayer + */ + +#include "common.h" + +const uint8_t ff_golomb_vlc_len[512]={ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, +7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +}; + +const uint8_t ff_ue_golomb_vlc_code[512]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30, + 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +const int8_t ff_se_golomb_vlc_code[512]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, -8, 9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15, + 4, 4, 4, 4, -4, -4, -4, -4, 5, 5, 5, 5, -5, -5, -5, -5, 6, 6, 6, 6, -6, -6, -6, -6, 7, 7, 7, 7, -7, -7, -7, -7, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + + +const uint8_t ff_ue_golomb_len[256]={ + 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,11, +11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,13, +13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13, +13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, +15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17, +}; + +const uint8_t ff_interleaved_golomb_vlc_len[256]={ +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +}; + +const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={ + 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3, + 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5, + 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const int8_t ff_interleaved_se_golomb_vlc_code[256]={ + 8, -8, 4, 4, 9, -9, -4, -4, 2, 2, 2, 2, 2, 2, 2, 2, + 10,-10, 5, 5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12,-12, 6, 6, 13,-13, -6, -6, 3, 3, 3, 3, 3, 3, 3, 3, + 14,-14, 7, 7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; diff --git a/mpeg4/src/libavcodec/golomb.h b/mpeg4/src/libavcodec/golomb.h new file mode 100644 index 0000000000000000000000000000000000000000..ef74f15c6602bf629306508fd67163794fbf7f9a --- /dev/null +++ b/mpeg4/src/libavcodec/golomb.h @@ -0,0 +1,469 @@ +/* + * exp golomb vlc stuff + * Copyright (c) 2003 Michael Niedermayer + * Copyright (c) 2004 Alex Beregszaszi + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file golomb.h + * @brief + * exp golomb vlc stuff + * @author Michael Niedermayer and Alex Beregszaszi + */ + +#define INVALID_VLC 0x80000000 + +extern const uint8_t ff_golomb_vlc_len[512]; +extern const uint8_t ff_ue_golomb_vlc_code[512]; +extern const int8_t ff_se_golomb_vlc_code[512]; +extern const uint8_t ff_ue_golomb_len[256]; + +extern const uint8_t ff_interleaved_golomb_vlc_len[256]; +extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256]; +extern const int8_t ff_interleaved_se_golomb_vlc_code[256]; + + + /** + * read unsigned exp golomb code. + */ +static inline int get_ue_golomb(GetBitContext *gb){ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + if(buf >= (1<<27)){ + buf >>= 32 - 9; + LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_ue_golomb_vlc_code[buf]; + }else{ + log= 2*av_log2(buf) - 31; + buf>>= log; + buf--; + LAST_SKIP_BITS(re, gb, 32 - log); + CLOSE_READER(re, gb); + + return buf; + } +} + +static inline int svq3_get_ue_golomb(GetBitContext *gb){ + uint32_t buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + if(buf&0xAA800000){ + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_interleaved_ue_golomb_vlc_code[buf]; + }else{ + LAST_SKIP_BITS(re, gb, 8); + UPDATE_CACHE(re, gb); + buf |= 1 | (GET_CACHE(re, gb) >> 8); + + if((buf & 0xAAAAAAAA) == 0) + return INVALID_VLC; + + for(log=31; (buf & 0x80000000) == 0; log--){ + buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + } + + LAST_SKIP_BITS(re, gb, 63 - 2*log - 8); + CLOSE_READER(re, gb); + + return ((buf << log) >> log) - 1; + } +} + +/** + * read unsigned truncated exp golomb code. + */ +static inline int get_te0_golomb(GetBitContext *gb, int range){ + assert(range >= 1); + + if(range==1) return 0; + else if(range==2) return get_bits1(gb)^1; + else return get_ue_golomb(gb); +} + +/** + * read unsigned truncated exp golomb code. + */ +static inline int get_te_golomb(GetBitContext *gb, int range){ + assert(range >= 1); + + if(range==2) return get_bits1(gb)^1; + else return get_ue_golomb(gb); +} + + +/** + * read signed exp golomb code. + */ +static inline int get_se_golomb(GetBitContext *gb){ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + if(buf >= (1<<27)){ + buf >>= 32 - 9; + LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_se_golomb_vlc_code[buf]; + }else{ + log= 2*av_log2(buf) - 31; + buf>>= log; + + LAST_SKIP_BITS(re, gb, 32 - log); + CLOSE_READER(re, gb); + + if(buf&1) buf= -(buf>>1); + else buf= (buf>>1); + + return buf; + } +} + +static inline int svq3_get_se_golomb(GetBitContext *gb){ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + if(buf&0xAA800000){ + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_interleaved_se_golomb_vlc_code[buf]; + }else{ + LAST_SKIP_BITS(re, gb, 8); + UPDATE_CACHE(re, gb); + buf |= 1 | (GET_CACHE(re, gb) >> 8); + + if((buf & 0xAAAAAAAA) == 0) + return INVALID_VLC; + + for(log=31; (buf & 0x80000000) == 0; log--){ + buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + } + + LAST_SKIP_BITS(re, gb, 63 - 2*log - 8); + CLOSE_READER(re, gb); + + return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1; + } +} + +/** + * read unsigned golomb rice code (ffv1). + */ +static inline int get_ur_golomb(GetBitContext *gb, int k, int limit, int esc_len){ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + log= av_log2(buf); + + if(log > 31-limit){ + buf >>= log - k; + buf += (30-log)<>= 32 - limit - esc_len; + LAST_SKIP_BITS(re, gb, esc_len + limit); + CLOSE_READER(re, gb); + + return buf + limit - 1; + } +} + +/** + * read unsigned golomb rice code (jpegls). + */ +static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int esc_len){ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + log= av_log2(buf); + + if(log > 31-11){ + buf >>= log - k; + buf += (30-log)<>1; + else return -(v>>1); + +// return (v>>1) ^ -(v&1); +} + +/** + * read signed golomb rice code (flac). + */ +static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int esc_len){ + int v= get_ur_golomb_jpegls(gb, k, limit, esc_len); + return (v>>1) ^ -(v&1); +} + +/** + * read unsigned golomb rice code (shorten). + */ +static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){ + return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); +} + +/** + * read signed golomb rice code (shorten). + */ +static inline int get_sr_golomb_shorten(GetBitContext* gb, int k) +{ + int uvar = get_ur_golomb_jpegls(gb, k + 1, INT_MAX, 0); + if (uvar & 1) + return ~(uvar >> 1); + else + return uvar >> 1; +} + + + +#ifdef TRACE + +static inline int get_ue(GetBitContext *s, char *file, const char *func, int line){ + int show= show_bits(s, 24); + int pos= get_bits_count(s); + int i= get_ue_golomb(s); + int len= get_bits_count(s) - pos; + int bits= show>>(24-len); + + print_bin(bits, len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); + + return i; +} + +static inline int get_se(GetBitContext *s, char *file, const char *func, int line){ + int show= show_bits(s, 24); + int pos= get_bits_count(s); + int i= get_se_golomb(s); + int len= get_bits_count(s) - pos; + int bits= show>>(24-len); + + print_bin(bits, len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); + + return i; +} + +static inline int get_te(GetBitContext *s, int r, char *file, const char *func, int line){ + int show= show_bits(s, 24); + int pos= get_bits_count(s); + int i= get_te0_golomb(s, r); + int len= get_bits_count(s) - pos; + int bits= show>>(24-len); + + print_bin(bits, len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); + + return i; +} + +#define get_ue_golomb(a) get_ue(a, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_se_golomb(a) get_se(a, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_te_golomb(a, r) get_te(a, r, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_te0_golomb(a, r) get_te(a, r, __FILE__, __PRETTY_FUNCTION__, __LINE__) + +#endif + +/** + * write unsigned exp golomb code. + */ +static inline void set_ue_golomb(PutBitContext *pb, int i){ + int e; + + assert(i>=0); + +#if 0 + if(i=0){ + put_bits(pb, 1, 1); + return; + } +#endif + if(i<256) + put_bits(pb, ff_ue_golomb_len[i], i+1); + else{ + e= av_log2(i+1); + + put_bits(pb, 2*e+1, i+1); + } +} + +/** + * write truncated unsigned exp golomb code. + */ +static inline void set_te_golomb(PutBitContext *pb, int i, int range){ + assert(range >= 1); + assert(i<=range); + + if(range==2) put_bits(pb, 1, i^1); + else set_ue_golomb(pb, i); +} + +/** + * write signed exp golomb code. 16 bits at most. + */ +static inline void set_se_golomb(PutBitContext *pb, int i){ +// if (i>32767 || i<-32767) +// av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i); +#if 0 + if(i<=0) i= -2*i; + else i= 2*i-1; +#elif 1 + i= 2*i-1; + if(i<0) i^= -1; //FIXME check if gcc does the right thing +#else + i= 2*i-1; + i^= (i>>31); +#endif + set_ue_golomb(pb, i); +} + +/** + * write unsigned golomb rice code (ffv1). + */ +static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit, int esc_len){ + int e; + + assert(i>=0); + + e= i>>k; + if(e=0); + + e= (i>>k) + 1; + if(e>31); + + set_ur_golomb(pb, v, k, limit, esc_len); +} + +/** + * write signed golomb rice code (flac). + */ +static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k, int limit, int esc_len){ + int v; + + v = -2*i-1; + v ^= (v>>31); + + set_ur_golomb_jpegls(pb, v, k, limit, esc_len); +} diff --git a/mpeg4/src/libavcodec/h261.c b/mpeg4/src/libavcodec/h261.c new file mode 100644 index 0000000000000000000000000000000000000000..96b5af1f6460797a7ead5f9092c20847a8e60238 --- /dev/null +++ b/mpeg4/src/libavcodec/h261.c @@ -0,0 +1,1047 @@ +/* + * H261 decoder + * Copyright (c) 2002-2004 Michael Niedermayer + * Copyright (c) 2004 Maarten Daniels + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file h261.c + * h261codec. + */ + +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" +#include "h261data.h" + + +#define H261_MBA_VLC_BITS 9 +#define H261_MTYPE_VLC_BITS 6 +#define H261_MV_VLC_BITS 7 +#define H261_CBP_VLC_BITS 9 +#define TCOEFF_VLC_BITS 9 + +#define MBA_STUFFING 33 +#define MBA_STARTCODE 34 +#define IS_FIL(a) ((a)&MB_TYPE_H261_FIL) + +/** + * H261Context + */ +typedef struct H261Context{ + MpegEncContext s; + + int current_mba; + int previous_mba; + int mba_diff; + int mtype; + int current_mv_x; + int current_mv_y; + int gob_number; + int gob_start_code_skipped; // 1 if gob start code is already read before gob header is read +}H261Context; + +void ff_h261_loop_filter(MpegEncContext *s){ + H261Context * h= (H261Context*)s; + const int linesize = s->linesize; + const int uvlinesize= s->uvlinesize; + uint8_t *dest_y = s->dest[0]; + uint8_t *dest_cb= s->dest[1]; + uint8_t *dest_cr= s->dest[2]; + + if(!(IS_FIL (h->mtype))) + return; + + s->dsp.h261_loop_filter(dest_y , linesize); + s->dsp.h261_loop_filter(dest_y + 8, linesize); + s->dsp.h261_loop_filter(dest_y + 8 * linesize , linesize); + s->dsp.h261_loop_filter(dest_y + 8 * linesize + 8, linesize); + s->dsp.h261_loop_filter(dest_cb, uvlinesize); + s->dsp.h261_loop_filter(dest_cr, uvlinesize); +} + +static int ff_h261_get_picture_format(int width, int height){ + // QCIF + if (width == 176 && height == 144) + return 0; + // CIF + else if (width == 352 && height == 288) + return 1; + // ERROR + else + return -1; +} + +static void h261_encode_block(H261Context * h, DCTELEM * block, + int n); +static int h261_decode_block(H261Context *h, DCTELEM *block, + int n, int coded); + +void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number){ + H261Context * h = (H261Context *) s; + int format, temp_ref; + + align_put_bits(&s->pb); + + /* Update the pointer to last GOB */ + s->ptr_lastgob = pbBufPtr(&s->pb); + + put_bits(&s->pb, 20, 0x10); /* PSC */ + + temp_ref= s->picture_number * (int64_t)30000 * s->avctx->time_base.num / + (1001 * (int64_t)s->avctx->time_base.den); //FIXME maybe this should use a timestamp + put_bits(&s->pb, 5, temp_ref & 0x1f); /* TemporalReference */ + + put_bits(&s->pb, 1, 0); /* split screen off */ + put_bits(&s->pb, 1, 0); /* camera off */ + put_bits(&s->pb, 1, 0); /* freeze picture release off */ + + format = ff_h261_get_picture_format(s->width, s->height); + + put_bits(&s->pb, 1, format); /* 0 == QCIF, 1 == CIF */ + + put_bits(&s->pb, 1, 0); /* still image mode */ + put_bits(&s->pb, 1, 0); /* reserved */ + + put_bits(&s->pb, 1, 0); /* no PEI */ + if(format == 0) + h->gob_number = -1; + else + h->gob_number = 0; + h->current_mba = 0; +} + +/** + * Encodes a group of blocks header. + */ +static void h261_encode_gob_header(MpegEncContext * s, int mb_line){ + H261Context * h = (H261Context *)s; + if(ff_h261_get_picture_format(s->width, s->height) == 0){ + h->gob_number+=2; // QCIF + } + else{ + h->gob_number++; // CIF + } + put_bits(&s->pb, 16, 1); /* GBSC */ + put_bits(&s->pb, 4, h->gob_number); /* GN */ + put_bits(&s->pb, 5, s->qscale); /* GQUANT */ + put_bits(&s->pb, 1, 0); /* no GEI */ + h->current_mba = 0; + h->previous_mba = 0; + h->current_mv_x=0; + h->current_mv_y=0; +} + +void ff_h261_reorder_mb_index(MpegEncContext* s){ + int index= s->mb_x + s->mb_y*s->mb_width; + + if(index % 33 == 0) + h261_encode_gob_header(s,0); + + /* for CIF the GOB's are fragmented in the middle of a scanline + that's why we need to adjust the x and y index of the macroblocks */ + if(ff_h261_get_picture_format(s->width,s->height) == 1){ // CIF + s->mb_x = index % 11 ; index /= 11; + s->mb_y = index % 3 ; index /= 3; + s->mb_x+= 11*(index % 2); index /= 2; + s->mb_y+= 3*index; + + ff_init_block_index(s); + ff_update_block_index(s); + } +} + +static void h261_encode_motion(H261Context * h, int val){ + MpegEncContext * const s = &h->s; + int sign, code; + if(val==0){ + code = 0; + put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]); + } + else{ + if(val > 15) + val -=32; + if(val < -16) + val+=32; + sign = val < 0; + code = sign ? -val : val; + put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]); + put_bits(&s->pb,1,sign); + } +} + +static inline int get_cbp(MpegEncContext * s, + DCTELEM block[6][64]) +{ + int i, cbp; + cbp= 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + return cbp; +} +void ff_h261_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + H261Context * h = (H261Context *)s; + int mvd, mv_diff_x, mv_diff_y, i, cbp; + cbp = 63; // avoid warning + mvd = 0; + + h->current_mba++; + h->mtype = 0; + + if (!s->mb_intra){ + /* compute cbp */ + cbp= get_cbp(s, block); + + /* mvd indicates if this block is motion compensated */ + mvd = motion_x | motion_y; + + if((cbp | mvd | s->dquant ) == 0) { + /* skip macroblock */ + s->skip_count++; + h->current_mv_x=0; + h->current_mv_y=0; + return; + } + } + + /* MB is not skipped, encode MBA */ + put_bits(&s->pb, h261_mba_bits[(h->current_mba-h->previous_mba)-1], h261_mba_code[(h->current_mba-h->previous_mba)-1]); + + /* calculate MTYPE */ + if(!s->mb_intra){ + h->mtype++; + + if(mvd || s->loop_filter) + h->mtype+=3; + if(s->loop_filter) + h->mtype+=3; + if(cbp || s->dquant) + h->mtype++; + assert(h->mtype > 1); + } + + if(s->dquant) + h->mtype++; + + put_bits(&s->pb, h261_mtype_bits[h->mtype], h261_mtype_code[h->mtype]); + + h->mtype = h261_mtype_map[h->mtype]; + + if(IS_QUANT(h->mtype)){ + ff_set_qscale(s,s->qscale+s->dquant); + put_bits(&s->pb, 5, s->qscale); + } + + if(IS_16X16(h->mtype)){ + mv_diff_x = (motion_x >> 1) - h->current_mv_x; + mv_diff_y = (motion_y >> 1) - h->current_mv_y; + h->current_mv_x = (motion_x >> 1); + h->current_mv_y = (motion_y >> 1); + h261_encode_motion(h,mv_diff_x); + h261_encode_motion(h,mv_diff_y); + } + + h->previous_mba = h->current_mba; + + if(HAS_CBP(h->mtype)){ + put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]); + } + for(i=0; i<6; i++) { + /* encode each block */ + h261_encode_block(h, block[i], i); + } + + if ( ( h->current_mba == 11 ) || ( h->current_mba == 22 ) || ( h->current_mba == 33 ) || ( !IS_16X16 ( h->mtype ) )){ + h->current_mv_x=0; + h->current_mv_y=0; + } +} + +void ff_h261_encode_init(MpegEncContext *s){ + static int done = 0; + + if (!done) { + done = 1; + init_rl(&h261_rl_tcoeff, 1); + } + + s->min_qcoeff= -127; + s->max_qcoeff= 127; + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; +} + + +/** + * encodes a 8x8 block. + * @param block the 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static void h261_encode_block(H261Context * h, DCTELEM * block, int n){ + MpegEncContext * const s = &h->s; + int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code; + RLTable *rl; + + rl = &h261_rl_tcoeff; + if (s->mb_intra) { + /* DC coef */ + level = block[0]; + /* 255 cannot be represented, so we clamp */ + if (level > 254) { + level = 254; + block[0] = 254; + } + /* 0 cannot be represented also */ + else if (level < 1) { + level = 1; + block[0] = 1; + } + if (level == 128) + put_bits(&s->pb, 8, 0xff); + else + put_bits(&s->pb, 8, level); + i = 1; + } else if((block[0]==1 || block[0] == -1) && (s->block_last_index[n] > -1)){ + //special case + put_bits(&s->pb,2,block[0]>0 ? 2 : 3 ); + i = 1; + } else { + i = 0; + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, 0 /*no last in H.261, EOB is used*/, run, level); + if(run==0 && level < 16) + code+=1; + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + put_bits(&s->pb, 6, run); + assert(slevel != 0); + assert(level <= 127); + put_bits(&s->pb, 8, slevel & 0xff); + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } + if(last_index > -1){ + put_bits(&s->pb, rl->table_vlc[0][1], rl->table_vlc[0][0]);// END OF BLOCK + } +} + +/***********************************************/ +/* decoding */ + +static VLC h261_mba_vlc; +static VLC h261_mtype_vlc; +static VLC h261_mv_vlc; +static VLC h261_cbp_vlc; + +void init_vlc_rl(RLTable *rl, int use_static); + +static void h261_decode_init_vlc(H261Context *h){ + static int done = 0; + + if(!done){ + done = 1; + init_vlc(&h261_mba_vlc, H261_MBA_VLC_BITS, 35, + h261_mba_bits, 1, 1, + h261_mba_code, 1, 1, 1); + init_vlc(&h261_mtype_vlc, H261_MTYPE_VLC_BITS, 10, + h261_mtype_bits, 1, 1, + h261_mtype_code, 1, 1, 1); + init_vlc(&h261_mv_vlc, H261_MV_VLC_BITS, 17, + &h261_mv_tab[0][1], 2, 1, + &h261_mv_tab[0][0], 2, 1, 1); + init_vlc(&h261_cbp_vlc, H261_CBP_VLC_BITS, 63, + &h261_cbp_tab[0][1], 2, 1, + &h261_cbp_tab[0][0], 2, 1, 1); + init_rl(&h261_rl_tcoeff, 1); + init_vlc_rl(&h261_rl_tcoeff, 1); + } +} + +static int h261_decode_init(AVCodecContext *avctx){ + H261Context *h= avctx->priv_data; + MpegEncContext * const s = &h->s; + + // set defaults + MPV_decode_defaults(s); + s->avctx = avctx; + + s->width = s->avctx->coded_width; + s->height = s->avctx->coded_height; + s->codec_id = s->avctx->codec->id; + + s->out_format = FMT_H261; + s->low_delay= 1; + avctx->pix_fmt= PIX_FMT_YUV420P; + + s->codec_id= avctx->codec->id; + + h261_decode_init_vlc(h); + + h->gob_start_code_skipped = 0; + + return 0; +} + +/** + * decodes the group of blocks header or slice header. + * @return <0 if an error occured + */ +static int h261_decode_gob_header(H261Context *h){ + unsigned int val; + MpegEncContext * const s = &h->s; + + if ( !h->gob_start_code_skipped ){ + /* Check for GOB Start Code */ + val = show_bits(&s->gb, 15); + if(val) + return -1; + + /* We have a GBSC */ + skip_bits(&s->gb, 16); + } + + h->gob_start_code_skipped = 0; + + h->gob_number = get_bits(&s->gb, 4); /* GN */ + s->qscale = get_bits(&s->gb, 5); /* GQUANT */ + + /* Check if gob_number is valid */ + if (s->mb_height==18){ //cif + if ((h->gob_number<=0) || (h->gob_number>12)) + return -1; + } + else{ //qcif + if ((h->gob_number!=1) && (h->gob_number!=3) && (h->gob_number!=5)) + return -1; + } + + /* GEI */ + while (get_bits1(&s->gb) != 0) { + skip_bits(&s->gb, 8); + } + + if(s->qscale==0) + return -1; + + // For the first transmitted macroblock in a GOB, MBA is the absolute address. For + // subsequent macroblocks, MBA is the difference between the absolute addresses of + // the macroblock and the last transmitted macroblock. + h->current_mba = 0; + h->mba_diff = 0; + + return 0; +} + +/** + * decodes the group of blocks / video packet header. + * @return <0 if no resync found + */ +static int ff_h261_resync(H261Context *h){ + MpegEncContext * const s = &h->s; + int left, ret; + + if ( h->gob_start_code_skipped ){ + ret= h261_decode_gob_header(h); + if(ret>=0) + return 0; + } + else{ + if(show_bits(&s->gb, 15)==0){ + ret= h261_decode_gob_header(h); + if(ret>=0) + return 0; + } + //ok, its not where its supposed to be ... + s->gb= s->last_resync_gb; + align_get_bits(&s->gb); + left= s->gb.size_in_bits - get_bits_count(&s->gb); + + for(;left>15+1+4+5; left-=8){ + if(show_bits(&s->gb, 15)==0){ + GetBitContext bak= s->gb; + + ret= h261_decode_gob_header(h); + if(ret>=0) + return 0; + + s->gb= bak; + } + skip_bits(&s->gb, 8); + } + } + + return -1; +} + +/** + * decodes skipped macroblocks + * @return 0 + */ +static int h261_decode_mb_skipped(H261Context *h, int mba1, int mba2 ) +{ + MpegEncContext * const s = &h->s; + int i; + + s->mb_intra = 0; + + for(i=mba1; imb_x= ((h->gob_number-1) % 2) * 11 + i % 11; + s->mb_y= ((h->gob_number-1) / 2) * 3 + i / 11; + xy = s->mb_x + s->mb_y * s->mb_stride; + ff_init_block_index(s); + ff_update_block_index(s); + + for(j=0;j<6;j++) + s->block_last_index[j] = -1; + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skipped = 1; + h->mtype &= ~MB_TYPE_H261_FIL; + + MPV_decode_mb(s, s->block); + } + + return 0; +} + +static int decode_mv_component(GetBitContext *gb, int v){ + int mv_diff = get_vlc2(gb, h261_mv_vlc.table, H261_MV_VLC_BITS, 2); + + /* check if mv_diff is valid */ + if ( mv_diff < 0 ) + return v; + + mv_diff = mvmap[mv_diff]; + + if(mv_diff && !get_bits1(gb)) + mv_diff= -mv_diff; + + v += mv_diff; + if (v <=-16) v+= 32; + else if(v >= 16) v-= 32; + + return v; +} + +static int h261_decode_mb(H261Context *h){ + MpegEncContext * const s = &h->s; + int i, cbp, xy; + + cbp = 63; + // Read mba + do{ + h->mba_diff = get_vlc2(&s->gb, h261_mba_vlc.table, H261_MBA_VLC_BITS, 2); + + /* Check for slice end */ + /* NOTE: GOB can be empty (no MB data) or exist only of MBA_stuffing */ + if (h->mba_diff == MBA_STARTCODE){ // start code + h->gob_start_code_skipped = 1; + return SLICE_END; + } + } + while( h->mba_diff == MBA_STUFFING ); // stuffing + + if ( h->mba_diff < 0 ){ + if ( get_bits_count(&s->gb) + 7 >= s->gb.size_in_bits ) + return SLICE_END; + + av_log(s->avctx, AV_LOG_ERROR, "illegal mba at %d %d\n", s->mb_x, s->mb_y); + return SLICE_ERROR; + } + + h->mba_diff += 1; + h->current_mba += h->mba_diff; + + if ( h->current_mba > MBA_STUFFING ) + return SLICE_ERROR; + + s->mb_x= ((h->gob_number-1) % 2) * 11 + ((h->current_mba-1) % 11); + s->mb_y= ((h->gob_number-1) / 2) * 3 + ((h->current_mba-1) / 11); + xy = s->mb_x + s->mb_y * s->mb_stride; + ff_init_block_index(s); + ff_update_block_index(s); + + // Read mtype + h->mtype = get_vlc2(&s->gb, h261_mtype_vlc.table, H261_MTYPE_VLC_BITS, 2); + h->mtype = h261_mtype_map[h->mtype]; + + // Read mquant + if ( IS_QUANT ( h->mtype ) ){ + ff_set_qscale(s, get_bits(&s->gb, 5)); + } + + s->mb_intra = IS_INTRA4x4(h->mtype); + + // Read mv + if ( IS_16X16 ( h->mtype ) ){ + // Motion vector data is included for all MC macroblocks. MVD is obtained from the macroblock vector by subtracting the + // vector of the preceding macroblock. For this calculation the vector of the preceding macroblock is regarded as zero in the + // following three situations: + // 1) evaluating MVD for macroblocks 1, 12 and 23; + // 2) evaluating MVD for macroblocks in which MBA does not represent a difference of 1; + // 3) MTYPE of the previous macroblock was not MC. + if ( ( h->current_mba == 1 ) || ( h->current_mba == 12 ) || ( h->current_mba == 23 ) || + ( h->mba_diff != 1)) + { + h->current_mv_x = 0; + h->current_mv_y = 0; + } + + h->current_mv_x= decode_mv_component(&s->gb, h->current_mv_x); + h->current_mv_y= decode_mv_component(&s->gb, h->current_mv_y); + }else{ + h->current_mv_x = 0; + h->current_mv_y = 0; + } + + // Read cbp + if ( HAS_CBP( h->mtype ) ){ + cbp = get_vlc2(&s->gb, h261_cbp_vlc.table, H261_CBP_VLC_BITS, 2) + 1; + } + + if(s->mb_intra){ + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + goto intra; + } + + //set motion vectors + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; + s->mv[0][0][0] = h->current_mv_x * 2;//gets divided by 2 in motion compensation + s->mv[0][0][1] = h->current_mv_y * 2; + +intra: + /* decode each block */ + if(s->mb_intra || HAS_CBP(h->mtype)){ + s->dsp.clear_blocks(s->block[0]); + for (i = 0; i < 6; i++) { + if (h261_decode_block(h, s->block[i], i, cbp&32) < 0){ + return SLICE_ERROR; + } + cbp+=cbp; + } + }else{ + for (i = 0; i < 6; i++) + s->block_last_index[i]= -1; + } + + MPV_decode_mb(s, s->block); + + return SLICE_OK; +} + +/** + * decodes a macroblock + * @return <0 if an error occured + */ +static int h261_decode_block(H261Context * h, DCTELEM * block, + int n, int coded) +{ + MpegEncContext * const s = &h->s; + int code, level, i, j, run; + RLTable *rl = &h261_rl_tcoeff; + const uint8_t *scan_table; + + // For the variable length encoding there are two code tables, one being used for + // the first transmitted LEVEL in INTER, INTER+MC and INTER+MC+FIL blocks, the second + // for all other LEVELs except the first one in INTRA blocks which is fixed length + // coded with 8 bits. + // NOTE: the two code tables only differ in one VLC so we handle that manually. + scan_table = s->intra_scantable.permutated; + if (s->mb_intra){ + /* DC coef */ + level = get_bits(&s->gb, 8); + // 0 (00000000b) and -128 (10000000b) are FORBIDDEN + if((level&0x7F) == 0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y); + return -1; + } + // The code 1000 0000 is not used, the reconstruction level of 1024 being coded as 1111 1111. + if (level == 255) + level = 128; + block[0] = level; + i = 1; + }else if(coded){ + // Run Level Code + // EOB Not possible for first level when cbp is available (that's why the table is different) + // 0 1 1s + // * * 0* + int check = show_bits(&s->gb, 2); + i = 0; + if ( check & 0x2 ){ + skip_bits(&s->gb, 2); + block[0] = ( check & 0x1 ) ? -1 : 1; + i = 1; + } + }else{ + i = 0; + } + if(!coded){ + s->block_last_index[n] = i - 1; + return 0; + } + for(;;){ + code = get_vlc2(&s->gb, rl->vlc.table, TCOEFF_VLC_BITS, 2); + if (code < 0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + if (code == rl->n) { + /* escape */ + // The remaining combinations of (run, level) are encoded with a 20-bit word consisting of 6 bits escape, 6 bits run and 8 bits level. + run = get_bits(&s->gb, 6); + level = get_sbits(&s->gb, 8); + }else if(code == 0){ + break; + }else{ + run = rl->table_run[code]; + level = rl->table_level[code]; + if (get_bits1(&s->gb)) + level = -level; + } + i += run; + if (i >= 64){ + av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + j = scan_table[i]; + block[j] = level; + i++; + } + s->block_last_index[n] = i-1; + return 0; +} + +/** + * decodes the H261 picture header. + * @return <0 if no startcode found + */ +static int h261_decode_picture_header(H261Context *h){ + MpegEncContext * const s = &h->s; + int format, i; + uint32_t startcode= 0; + + for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=1){ + startcode = ((startcode << 1) | get_bits(&s->gb, 1)) & 0x000FFFFF; + + if(startcode == 0x10) + break; + } + + if (startcode != 0x10){ + av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n"); + return -1; + } + + /* temporal reference */ + s->picture_number = get_bits(&s->gb, 5); /* picture timestamp */ + + /* PTYPE starts here */ + skip_bits1(&s->gb); /* split screen off */ + skip_bits1(&s->gb); /* camera off */ + skip_bits1(&s->gb); /* freeze picture release off */ + + format = get_bits1(&s->gb); + + //only 2 formats possible + if (format == 0){//QCIF + s->width = 176; + s->height = 144; + s->mb_width = 11; + s->mb_height = 9; + }else{//CIF + s->width = 352; + s->height = 288; + s->mb_width = 22; + s->mb_height = 18; + } + + s->mb_num = s->mb_width * s->mb_height; + + skip_bits1(&s->gb); /* still image mode off */ + skip_bits1(&s->gb); /* Reserved */ + + /* PEI */ + while (get_bits1(&s->gb) != 0){ + skip_bits(&s->gb, 8); + } + + // h261 has no I-FRAMES, but if we pass I_TYPE for the first frame, the codec crashes if it does + // not contain all I-blocks (e.g. when a packet is lost) + s->pict_type = P_TYPE; + + h->gob_number = 0; + return 0; +} + +static int h261_decode_gob(H261Context *h){ + MpegEncContext * const s = &h->s; + + ff_set_qscale(s, s->qscale); + + /* decode mb's */ + while(h->current_mba <= MBA_STUFFING) + { + int ret; + /* DCT & quantize */ + ret= h261_decode_mb(h); + if(ret<0){ + if(ret==SLICE_END){ + h261_decode_mb_skipped(h, h->current_mba, 33); + return 0; + } + av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", s->mb_x + s->mb_y*s->mb_stride); + return -1; + } + + h261_decode_mb_skipped(h, h->current_mba-h->mba_diff, h->current_mba-1); + } + + return -1; +} + +static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const uint8_t *buf, int buf_size){ + int vop_found, i, j; + uint32_t state; + + vop_found= pc->frame_start_found; + state= pc->state; + + for(i=0; i>j)&0xFFFFF) == 0x00010){ + i++; + vop_found=1; + break; + } + } + } + if(vop_found){ + for(; i>j)&0xFFFFF) == 0x00010){ + pc->frame_start_found=0; + pc->state= state>>(2*8); + return i-1; + } + } + } + } + + pc->frame_start_found= vop_found; + pc->state= state; + return END_NOT_FOUND; +} + +static int h261_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + ParseContext *pc = s->priv_data; + int next; + + next= h261_find_frame_end(pc,avctx, buf, buf_size); + if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + *poutbuf = (uint8_t *)buf; + *poutbuf_size = buf_size; + return next; +} + +/** + * returns the number of bytes consumed for building the current frame + */ +static int get_consumed_bytes(MpegEncContext *s, int buf_size){ + int pos= get_bits_count(&s->gb)>>3; + if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...) + if(pos+10>buf_size) pos=buf_size; // oops ;) + + return pos; +} + +static int h261_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + H261Context *h= avctx->priv_data; + MpegEncContext *s = &h->s; + int ret; + AVFrame *pict = data; + +#ifdef DEBUG + av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size); + av_log(avctx, AV_LOG_DEBUG, "bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); +#endif + s->flags= avctx->flags; + s->flags2= avctx->flags2; + + h->gob_start_code_skipped=0; + +retry: + + init_get_bits(&s->gb, buf, buf_size*8); + + if(!s->context_initialized){ + if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix + return -1; + } + + //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there + if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){ + int i= ff_find_unused_picture(s, 0); + s->current_picture_ptr= &s->picture[i]; + } + + ret = h261_decode_picture_header(h); + + /* skip if the header was thrashed */ + if (ret < 0){ + av_log(s->avctx, AV_LOG_ERROR, "header damaged\n"); + return -1; + } + + if (s->width != avctx->coded_width || s->height != avctx->coded_height){ + ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat + s->parse_context.buffer=0; + MPV_common_end(s); + s->parse_context= pc; + } + if (!s->context_initialized) { + avcodec_set_dimensions(avctx, s->width, s->height); + + goto retry; + } + + // for hurry_up==5 + s->current_picture.pict_type= s->pict_type; + s->current_picture.key_frame= s->pict_type == I_TYPE; + + /* skip everything if we are in a hurry>=5 */ + if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size); + if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE) + ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE) + || avctx->skip_frame >= AVDISCARD_ALL) + return get_consumed_bytes(s, buf_size); + + if(MPV_frame_start(s, avctx) < 0) + return -1; + + ff_er_frame_start(s); + + /* decode each macroblock */ + s->mb_x=0; + s->mb_y=0; + + while(h->gob_number < (s->mb_height==18 ? 12 : 5)){ + if(ff_h261_resync(h)<0) + break; + h261_decode_gob(h); + } + MPV_frame_end(s); + +assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type); +assert(s->current_picture.pict_type == s->pict_type); + *pict= *(AVFrame*)s->current_picture_ptr; + ff_print_debug_info(s, pict); + + /* Return the Picture timestamp as the frame number */ + /* we substract 1 because it is added on utils.c */ + avctx->frame_number = s->picture_number - 1; + + *data_size = sizeof(AVFrame); + + return get_consumed_bytes(s, buf_size); +} + +static int h261_decode_end(AVCodecContext *avctx) +{ + H261Context *h= avctx->priv_data; + MpegEncContext *s = &h->s; + + MPV_common_end(s); + return 0; +} + +#ifdef CONFIG_ENCODERS +AVCodec h261_encoder = { + "h261", + CODEC_TYPE_VIDEO, + CODEC_ID_H261, + sizeof(H261Context), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; +#endif + +AVCodec h261_decoder = { + "h261", + CODEC_TYPE_VIDEO, + CODEC_ID_H261, + sizeof(H261Context), + h261_decode_init, + NULL, + h261_decode_end, + h261_decode_frame, + CODEC_CAP_DR1, +}; + +AVCodecParser h261_parser = { + { CODEC_ID_H261 }, + sizeof(ParseContext), + NULL, + h261_parse, + ff_parse_close, +}; diff --git a/mpeg4/src/libavcodec/h261data.h b/mpeg4/src/libavcodec/h261data.h new file mode 100644 index 0000000000000000000000000000000000000000..9ea991b237e116d7bf22dd42c93881f5288ef348 --- /dev/null +++ b/mpeg4/src/libavcodec/h261data.h @@ -0,0 +1,136 @@ +/** + * @file h261data.h + * H.261 tables. + */ +#define MB_TYPE_H261_FIL 0x800000 + +// H.261 VLC table for macroblock addressing +const uint8_t h261_mba_code[35] = { + 1, 3, 2, 3, + 2, 3, 2, 7, + 6, 11, 10, 9, + 8, 7, 6, 23, + 22, 21, 20, 19, + 18, 35, 34, 33, + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, + 15, //(MBA stuffing) + 1 //(start code) +}; + +const uint8_t h261_mba_bits[35] = { + 1, 3, 3, 4, + 4, 5, 5, 7, + 7, 8, 8, 8, + 8, 8, 8, 10, + 10, 10, 10, 10, + 10, 11, 11, 11, + 11, 11, 11, 11, + 11, 11, 11, 11, + 11, + 11, //(MBA stuffing) + 16 //(start code) +}; + +//H.261 VLC table for macroblock type +const uint8_t h261_mtype_code[10] = { + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1 +}; + +const uint8_t h261_mtype_bits[10] = { + 4, 7, 1, 5, + 9, 8, 10, 3, + 2, 6 +}; + +static const int h261_mtype_map[10]= { + MB_TYPE_INTRA4x4, + MB_TYPE_INTRA4x4 | MB_TYPE_QUANT, + MB_TYPE_CBP, + MB_TYPE_QUANT | MB_TYPE_CBP, + MB_TYPE_16x16, + MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_QUANT | MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_16x16 | MB_TYPE_H261_FIL, + MB_TYPE_CBP | MB_TYPE_16x16 | MB_TYPE_H261_FIL, + MB_TYPE_QUANT | MB_TYPE_CBP | MB_TYPE_16x16 | MB_TYPE_H261_FIL +}; + +//H.261 VLC table for motion vectors +const uint8_t h261_mv_tab[17][2] = { + {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7}, + {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, {12,10} +}; + +static const int mvmap[17] = +{ + 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16 +}; + +//H.261 VLC table for coded block pattern +const uint8_t h261_cbp_tab[63][2] = +{ + {11,5}, {9,5}, {13,6}, {13,4}, {23,7}, {19,7}, {31,8}, {12,4}, + {22,7}, {18,7}, {30,8}, {19,5}, {27,8}, {23,8}, {19,8}, {11,4}, + {21,7}, {17,7}, {29,8}, {17,5}, {25,8}, {21,8}, {17,8}, {15,6}, + {15,8}, {13,8}, {3,9}, {15,5}, {11,8}, {7,8}, {7,9}, {10,4}, + {20,7}, {16,7}, {28,8}, {14,6}, {14,8}, {12,8}, {2,9}, {16,5}, + {24,8}, {20,8}, {16,8}, {14,5}, {10,8}, {6,8}, {6,9}, {18,5}, + {26,8}, {22,8}, {18,8}, {13,5}, {9,8}, {5,8}, {5,9}, {12,5}, + {8,8}, {4,8}, {4,9}, {7,3}, {10,5}, {8,5}, {12,6} +}; + +//H.261 VLC table for transform coefficients +const uint16_t h261_tcoeff_vlc[65][2] = { +{ 0x2, 2 }, { 0x3, 2 },{ 0x4, 4 },{ 0x5, 5 }, +{ 0x6, 7 },{ 0x26, 8 },{ 0x21, 8 },{ 0xa, 10 }, +{ 0x1d, 12 },{ 0x18, 12 },{ 0x13, 12 },{ 0x10 , 12 }, +{ 0x1a, 13},{ 0x19, 13 }, { 0x18, 13 }, { 0x17, 13 }, +{ 0x3, 3 }, { 0x6, 6 }, { 0x25 , 8 }, { 0xc, 10 }, +{ 0x1b, 12 }, { 0x16, 13 }, { 0x15, 13 }, { 0x5, 4}, +{ 0x4, 7}, { 0xb, 10 }, { 0x14, 12 }, { 0x14, 13 }, +{ 0x7, 5 }, { 0x24, 8 }, { 0x1c, 12 }, { 0x13, 13 }, +{ 0x6, 5 }, { 0xf, 10 }, { 0x12, 12}, { 0x7, 6}, +{ 0x9 , 10 }, { 0x12, 13 }, { 0x5, 6 }, { 0x1e, 12 }, +{ 0x4, 6 }, { 0x15, 12 }, { 0x7, 7 }, { 0x11, 12}, +{ 0x5, 7 }, { 0x11, 13 }, { 0x27, 8 }, { 0x10, 13 }, +{ 0x23, 8 }, { 0x22, 8 }, { 0x20, 8 }, { 0xe , 10 }, +{ 0xd, 10 }, { 0x8, 10 },{ 0x1f, 12 }, { 0x1a, 12 }, +{ 0x19, 12 }, { 0x17, 12 }, { 0x16, 12}, { 0x1f, 13}, +{ 0x1e, 13 }, { 0x1d, 13 }, { 0x1c, 13}, { 0x1b, 13}, +{ 0x1, 6 } //escape +}; + +const int8_t h261_tcoeff_level[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 1, + 2, 3, 4, 5, 1, 2, 3, 4, + 1, 2, 3, 1, 2, 3, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 +}; + +const int8_t h261_tcoeff_run[64] = { + 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 3, 3, 3, 3, 4, + 4, 4, 5, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26 +}; + +static RLTable h261_rl_tcoeff = { + 64, + 64, + h261_tcoeff_vlc, + h261_tcoeff_run, + h261_tcoeff_level, +}; diff --git a/mpeg4/src/libavcodec/h263.c b/mpeg4/src/libavcodec/h263.c new file mode 100644 index 0000000000000000000000000000000000000000..df05cbac451c9b8520cb75a8720e6fe70bd7b56d --- /dev/null +++ b/mpeg4/src/libavcodec/h263.c @@ -0,0 +1,6276 @@ +/* + * H263/MPEG4 backend for ffmpeg encoder and decoder + * Copyright (c) 2000,2001 Fabrice Bellard. + * H263+ support. + * Copyright (c) 2001 Juan J. Sierralta P. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * ac prediction encoding, b-frame support, error resilience, optimizations, + * qpel decoding, gmc decoding, interlaced decoding, + * by Michael Niedermayer + */ + +/** + * @file h263.c + * h263/mpeg4 codec. + */ + +//#define DEBUG +#include + +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" +#include "h263data.h" +#include "mpeg4data.h" + +//#undef NDEBUG +//#include + +#define INTRA_MCBPC_VLC_BITS 6 +#define INTER_MCBPC_VLC_BITS 7 +#define CBPY_VLC_BITS 6 +#define MV_VLC_BITS 9 +#define DC_VLC_BITS 9 +#define SPRITE_TRAJ_VLC_BITS 6 +#define MB_TYPE_B_VLC_BITS 4 +#define TEX_VLC_BITS 9 +#define H263_MBTYPE_B_VLC_BITS 6 +#define CBPC_B_VLC_BITS 3 + +#ifdef CONFIG_ENCODERS +static void h263_encode_block(MpegEncContext * s, DCTELEM * block, + int n); +static void h263p_encode_umotion(MpegEncContext * s, int val); +static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, + int n, int dc, uint8_t *scan_table, + PutBitContext *dc_pb, PutBitContext *ac_pb); +#endif + +static int h263_decode_motion(MpegEncContext * s, int pred, int fcode); +static int h263p_decode_umotion(MpegEncContext * s, int pred); +static int h263_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded); +static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); +static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded, int intra, int rvlc); +static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, + uint8_t *scan_table); +static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr); +#ifdef CONFIG_ENCODERS +static void mpeg4_encode_visual_object_header(MpegEncContext * s); +static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_number); +#endif //CONFIG_ENCODERS +static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb); +static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding); + +#ifdef CONFIG_ENCODERS +static uint8_t uni_DCtab_lum_len[512]; +static uint8_t uni_DCtab_chrom_len[512]; +static uint16_t uni_DCtab_lum_bits[512]; +static uint16_t uni_DCtab_chrom_bits[512]; + +static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL; +static uint8_t fcode_tab[MAX_MV*2+1]; +static uint8_t umv_fcode_tab[MAX_MV*2+1]; + +static uint32_t uni_mpeg4_intra_rl_bits[64*64*2*2]; +static uint8_t uni_mpeg4_intra_rl_len [64*64*2*2]; +static uint32_t uni_mpeg4_inter_rl_bits[64*64*2*2]; +static uint8_t uni_mpeg4_inter_rl_len [64*64*2*2]; +static uint8_t uni_h263_intra_aic_rl_len [64*64*2*2]; +static uint8_t uni_h263_inter_rl_len [64*64*2*2]; +//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level)) +//#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64) +#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run)*128 + (level)) + +/* mpeg4 +inter +max level: 24/6 +max run: 53/63 + +intra +max level: 53/16 +max run: 29/41 +*/ +#endif + +#if 0 //3IV1 is quite rare and it slows things down a tiny bit +#define IS_3IV1 s->avctx->codec_tag == ff_get_fourcc("3IV1") +#else +#define IS_3IV1 0 +#endif + +int h263_get_picture_format(int width, int height) +{ + int format; + + if (width == 128 && height == 96) + format = 1; + else if (width == 176 && height == 144) + format = 2; + else if (width == 352 && height == 288) + format = 3; + else if (width == 704 && height == 576) + format = 4; + else if (width == 1408 && height == 1152) + format = 5; + else + format = 7; + return format; +} + +#ifdef CONFIG_ENCODERS + +static void aspect_to_info(MpegEncContext * s, AVRational aspect){ + int i; + + if(aspect.num==0) aspect= (AVRational){1,1}; + + for(i=1; i<6; i++){ + if(av_cmp_q(pixel_aspect[i], aspect) == 0){ + s->aspect_ratio_info=i; + return; + } + } + + s->aspect_ratio_info= FF_ASPECT_EXTENDED; +} + +void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number) +{ + int format; + + align_put_bits(&s->pb); + + put_bits(&s->pb, 17, 1); + put_bits(&s->pb, 5, (s->h263_flv-1)); /* 0: h263 escape codes 1: 11-bit escape codes */ + put_bits(&s->pb, 8, (((int64_t)s->picture_number * 30 * s->avctx->time_base.num) / //FIXME use timestamp + s->avctx->time_base.den) & 0xff); /* TemporalReference */ + if (s->width == 352 && s->height == 288) + format = 2; + else if (s->width == 176 && s->height == 144) + format = 3; + else if (s->width == 128 && s->height == 96) + format = 4; + else if (s->width == 320 && s->height == 240) + format = 5; + else if (s->width == 160 && s->height == 120) + format = 6; + else if (s->width <= 255 && s->height <= 255) + format = 0; /* use 1 byte width & height */ + else + format = 1; /* use 2 bytes width & height */ + put_bits(&s->pb, 3, format); /* PictureSize */ + if (format == 0) { + put_bits(&s->pb, 8, s->width); + put_bits(&s->pb, 8, s->height); + } else if (format == 1) { + put_bits(&s->pb, 16, s->width); + put_bits(&s->pb, 16, s->height); + } + put_bits(&s->pb, 2, s->pict_type == P_TYPE); /* PictureType */ + put_bits(&s->pb, 1, 1); /* DeblockingFlag: on */ + put_bits(&s->pb, 5, s->qscale); /* Quantizer */ + put_bits(&s->pb, 1, 0); /* ExtraInformation */ + + if(s->h263_aic){ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_aic_dc_scale_table; + }else{ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + } +} + +void h263_encode_picture_header(MpegEncContext * s, int picture_number) +{ + int format, coded_frame_rate, coded_frame_rate_base, i, temp_ref; + int best_clock_code=1; + int best_divisor=60; + int best_error= INT_MAX; + + if(s->h263_plus){ + for(i=0; i<2; i++){ + int div, error; + div= (s->avctx->time_base.num*1800000LL + 500LL*s->avctx->time_base.den) / ((1000LL+i)*s->avctx->time_base.den); + div= clip(1, div, 127); + error= ABS(s->avctx->time_base.num*1800000LL - (1000LL+i)*s->avctx->time_base.den*div); + if(error < best_error){ + best_error= error; + best_divisor= div; + best_clock_code= i; + } + } + } + s->custom_pcf= best_clock_code!=1 || best_divisor!=60; + coded_frame_rate= 1800000; + coded_frame_rate_base= (1000+best_clock_code)*best_divisor; + + align_put_bits(&s->pb); + + /* Update the pointer to last GOB */ + s->ptr_lastgob = pbBufPtr(&s->pb); + put_bits(&s->pb, 22, 0x20); /* PSC */ + temp_ref= s->picture_number * (int64_t)coded_frame_rate * s->avctx->time_base.num / //FIXME use timestamp + (coded_frame_rate_base * (int64_t)s->avctx->time_base.den); + put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */ + + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, 1, 0); /* h263 id */ + put_bits(&s->pb, 1, 0); /* split screen off */ + put_bits(&s->pb, 1, 0); /* camera off */ + put_bits(&s->pb, 1, 0); /* freeze picture release off */ + + format = h263_get_picture_format(s->width, s->height); + if (!s->h263_plus) { + /* H.263v1 */ + put_bits(&s->pb, 3, format); + put_bits(&s->pb, 1, (s->pict_type == P_TYPE)); + /* By now UMV IS DISABLED ON H.263v1, since the restrictions + of H.263v1 UMV implies to check the predicted MV after + calculation of the current MB to see if we're on the limits */ + put_bits(&s->pb, 1, 0); /* Unrestricted Motion Vector: off */ + put_bits(&s->pb, 1, 0); /* SAC: off */ + put_bits(&s->pb, 1, s->obmc); /* Advanced Prediction */ + put_bits(&s->pb, 1, 0); /* only I/P frames, no PB frame */ + put_bits(&s->pb, 5, s->qscale); + put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ + } else { + int ufep=1; + /* H.263v2 */ + /* H.263 Plus PTYPE */ + + put_bits(&s->pb, 3, 7); + put_bits(&s->pb,3,ufep); /* Update Full Extended PTYPE */ + if (format == 7) + put_bits(&s->pb,3,6); /* Custom Source Format */ + else + put_bits(&s->pb, 3, format); + + put_bits(&s->pb,1, s->custom_pcf); + put_bits(&s->pb,1, s->umvplus); /* Unrestricted Motion Vector */ + put_bits(&s->pb,1,0); /* SAC: off */ + put_bits(&s->pb,1,s->obmc); /* Advanced Prediction Mode */ + put_bits(&s->pb,1,s->h263_aic); /* Advanced Intra Coding */ + put_bits(&s->pb,1,s->loop_filter); /* Deblocking Filter */ + put_bits(&s->pb,1,s->h263_slice_structured); /* Slice Structured */ + put_bits(&s->pb,1,0); /* Reference Picture Selection: off */ + put_bits(&s->pb,1,0); /* Independent Segment Decoding: off */ + put_bits(&s->pb,1,s->alt_inter_vlc); /* Alternative Inter VLC */ + put_bits(&s->pb,1,s->modified_quant); /* Modified Quantization: */ + put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ + put_bits(&s->pb,3,0); /* Reserved */ + + put_bits(&s->pb, 3, s->pict_type == P_TYPE); + + put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */ + put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */ + put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */ + put_bits(&s->pb,2,0); /* Reserved */ + put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ + + /* This should be here if PLUSPTYPE */ + put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ + + if (format == 7) { + /* Custom Picture Format (CPFMT) */ + aspect_to_info(s, s->avctx->sample_aspect_ratio); + + put_bits(&s->pb,4,s->aspect_ratio_info); + put_bits(&s->pb,9,(s->width >> 2) - 1); + put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ + put_bits(&s->pb,9,(s->height >> 2)); + if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){ + put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num); + put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den); + } + } + if(s->custom_pcf){ + if(ufep){ + put_bits(&s->pb, 1, best_clock_code); + put_bits(&s->pb, 7, best_divisor); + } + put_bits(&s->pb, 2, (temp_ref>>8)&3); + } + + /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */ + if (s->umvplus) +// put_bits(&s->pb,1,1); /* Limited according tables of Annex D */ +//FIXME check actual requested range + put_bits(&s->pb,2,1); /* unlimited */ + if(s->h263_slice_structured) + put_bits(&s->pb,2,0); /* no weird submodes */ + + put_bits(&s->pb, 5, s->qscale); + } + + put_bits(&s->pb, 1, 0); /* no PEI */ + + if(s->h263_slice_structured){ + put_bits(&s->pb, 1, 1); + + assert(s->mb_x == 0 && s->mb_y == 0); + ff_h263_encode_mba(s); + + put_bits(&s->pb, 1, 1); + } + + if(s->h263_aic){ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_aic_dc_scale_table; + }else{ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + } +} + +/** + * Encodes a group of blocks header. + */ +void h263_encode_gob_header(MpegEncContext * s, int mb_line) +{ + put_bits(&s->pb, 17, 1); /* GBSC */ + + if(s->h263_slice_structured){ + put_bits(&s->pb, 1, 1); + + ff_h263_encode_mba(s); + + if(s->mb_num > 1583) + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 5, s->qscale); /* GQUANT */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */ + }else{ + int gob_number= mb_line / s->gob_index; + + put_bits(&s->pb, 5, gob_number); /* GN */ + put_bits(&s->pb, 2, s->pict_type == I_TYPE); /* GFID */ + put_bits(&s->pb, 5, s->qscale); /* GQUANT */ + } +} + +static inline int get_block_rate(MpegEncContext * s, DCTELEM block[64], int block_last_index, uint8_t scantable[64]){ + int last=0; + int j; + int rate=0; + + for(j=1; j<=block_last_index; j++){ + const int index= scantable[j]; + int level= block[index]; + if(level){ + level+= 64; + if((level&(~127)) == 0){ + if(jintra_ac_vlc_length [UNI_AC_ENC_INDEX(j-last-1, level)]; + else rate+= s->intra_ac_vlc_last_length[UNI_AC_ENC_INDEX(j-last-1, level)]; + }else + rate += s->ac_esc_length; + level-= 64; + + last= j; + } + } + + return rate; +} + +static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int dir[6], uint8_t *st[6], int zigzag_last_index[6]) +{ + int score= 0; + int i, n; + int8_t * const qscale_table= s->current_picture.qscale_table; + + memcpy(zigzag_last_index, s->block_last_index, sizeof(int)*6); + + for(n=0; n<6; n++){ + int16_t *ac_val, *ac_val1; + + score -= get_block_rate(s, block[n], s->block_last_index[n], s->intra_scantable.permutated); + + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + ac_val1= ac_val; + if(dir[n]){ + const int xy= s->mb_x + s->mb_y*s->mb_stride - s->mb_stride; + /* top prediction */ + ac_val-= s->block_wrap[n]*16; + if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){ + /* same qscale */ + for(i=1; i<8; i++){ + const int level= block[n][s->dsp.idct_permutation[i ]]; + block[n][s->dsp.idct_permutation[i ]] = level - ac_val[i+8]; + ac_val1[i ]= block[n][s->dsp.idct_permutation[i<<3]]; + ac_val1[i+8]= level; + } + }else{ + /* different qscale, we must rescale */ + for(i=1; i<8; i++){ + const int level= block[n][s->dsp.idct_permutation[i ]]; + block[n][s->dsp.idct_permutation[i ]] = level - ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale); + ac_val1[i ]= block[n][s->dsp.idct_permutation[i<<3]]; + ac_val1[i+8]= level; + } + } + st[n]= s->intra_h_scantable.permutated; + }else{ + const int xy= s->mb_x-1 + s->mb_y*s->mb_stride; + /* left prediction */ + ac_val-= 16; + if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){ + /* same qscale */ + for(i=1; i<8; i++){ + const int level= block[n][s->dsp.idct_permutation[i<<3]]; + block[n][s->dsp.idct_permutation[i<<3]]= level - ac_val[i]; + ac_val1[i ]= level; + ac_val1[i+8]= block[n][s->dsp.idct_permutation[i ]]; + } + }else{ + /* different qscale, we must rescale */ + for(i=1; i<8; i++){ + const int level= block[n][s->dsp.idct_permutation[i<<3]]; + block[n][s->dsp.idct_permutation[i<<3]]= level - ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale); + ac_val1[i ]= level; + ac_val1[i+8]= block[n][s->dsp.idct_permutation[i ]]; + } + } + st[n]= s->intra_v_scantable.permutated; + } + + for(i=63; i>0; i--) //FIXME optimize + if(block[n][ st[n][i] ]) break; + s->block_last_index[n]= i; + + score += get_block_rate(s, block[n], s->block_last_index[n], st[n]); + } + + return score < 0; +} + +static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], int dir[6], uint8_t *st[6], int zigzag_last_index[6]) +{ + int i, n; + memcpy(s->block_last_index, zigzag_last_index, sizeof(int)*6); + + for(n=0; n<6; n++){ + int16_t *ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + + st[n]= s->intra_scantable.permutated; + if(dir[n]){ + /* top prediction */ + for(i=1; i<8; i++){ + block[n][s->dsp.idct_permutation[i ]] = ac_val[i+8]; + } + }else{ + /* left prediction */ + for(i=1; i<8; i++){ + block[n][s->dsp.idct_permutation[i<<3]]= ac_val[i ]; + } + } + } +} + +/** + * modify qscale so that encoding is acually possible in h263 (limit difference to -2..2) + */ +void ff_clean_h263_qscales(MpegEncContext *s){ + int i; + int8_t * const qscale_table= s->current_picture.qscale_table; + + for(i=1; imb_num; i++){ + if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2) + qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2; + } + for(i=s->mb_num-2; i>=0; i--){ + if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i+1] ] >2) + qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i+1] ]+2; + } + + if(s->codec_id != CODEC_ID_H263P){ + for(i=1; imb_num; i++){ + int mb_xy= s->mb_index2xy[i]; + + if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){ + s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V; + s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER; + } + } + } +} + +/** + * modify mb_type & qscale so that encoding is acually possible in mpeg4 + */ +void ff_clean_mpeg4_qscales(MpegEncContext *s){ + int i; + int8_t * const qscale_table= s->current_picture.qscale_table; + + ff_clean_h263_qscales(s); + + if(s->pict_type== B_TYPE){ + int odd=0; + /* ok, come on, this isn't funny anymore, there's more code for handling this mpeg4 mess than for the actual adaptive quantization */ + + for(i=0; imb_num; i++){ + int mb_xy= s->mb_index2xy[i]; + odd += qscale_table[mb_xy]&1; + } + + if(2*odd > s->mb_num) odd=1; + else odd=0; + + for(i=0; imb_num; i++){ + int mb_xy= s->mb_index2xy[i]; + if((qscale_table[mb_xy]&1) != odd) + qscale_table[mb_xy]++; + if(qscale_table[mb_xy] > 31) + qscale_table[mb_xy]= 31; + } + + for(i=1; imb_num; i++){ + int mb_xy= s->mb_index2xy[i]; + if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){ + s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_DIRECT; + s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_BIDIR; + } + } + } +} + +#endif //CONFIG_ENCODERS + +#define tab_size ((signed)(sizeof(s->direct_scale_mv[0])/sizeof(int16_t))) +#define tab_bias (tab_size/2) + +static void ff_mpeg4_init_direct_mv(MpegEncContext *s){ + int i; + for(i=0; idirect_scale_mv[0][i] = (i-tab_bias)*s->pb_time/s->pp_time; + s->direct_scale_mv[1][i] = (i-tab_bias)*(s->pb_time-s->pp_time)/s->pp_time; + } +} + +static inline void ff_mpeg4_set_one_direct_mv(MpegEncContext *s, int mx, int my, int i){ + int xy= s->block_index[i]; + uint16_t time_pp= s->pp_time; + uint16_t time_pb= s->pb_time; + int p_mx, p_my; + + p_mx= s->next_picture.motion_val[0][xy][0]; + if((unsigned)(p_mx + tab_bias) < tab_size){ + s->mv[0][i][0] = s->direct_scale_mv[0][p_mx + tab_bias] + mx; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx + : s->direct_scale_mv[1][p_mx + tab_bias]; + }else{ + s->mv[0][i][0] = p_mx*time_pb/time_pp + mx; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx + : p_mx*(time_pb - time_pp)/time_pp; + } + p_my= s->next_picture.motion_val[0][xy][1]; + if((unsigned)(p_my + tab_bias) < tab_size){ + s->mv[0][i][1] = s->direct_scale_mv[0][p_my + tab_bias] + my; + s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my + : s->direct_scale_mv[1][p_my + tab_bias]; + }else{ + s->mv[0][i][1] = p_my*time_pb/time_pp + my; + s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my + : p_my*(time_pb - time_pp)/time_pp; + } +} + +#undef tab_size +#undef tab_bias + +/** + * + * @return the mb_type + */ +int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ + const int mb_index= s->mb_x + s->mb_y*s->mb_stride; + const int colocated_mb_type= s->next_picture.mb_type[mb_index]; + uint16_t time_pp= s->pp_time; + uint16_t time_pb= s->pb_time; + int i; + + //FIXME avoid divides + // try special case with shifts for 1 and 3 B-frames? + + if(IS_8X8(colocated_mb_type)){ + s->mv_type = MV_TYPE_8X8; + for(i=0; i<4; i++){ + ff_mpeg4_set_one_direct_mv(s, mx, my, i); + } + return MB_TYPE_DIRECT2 | MB_TYPE_8x8 | MB_TYPE_L0L1; + } else if(IS_INTERLACED(colocated_mb_type)){ + s->mv_type = MV_TYPE_FIELD; + for(i=0; i<2; i++){ + int field_select= s->next_picture.ref_index[0][s->block_index[2*i]]; + s->field_select[0][i]= field_select; + s->field_select[1][i]= i; + if(s->top_field_first){ + time_pp= s->pp_field_time - field_select + i; + time_pb= s->pb_field_time - field_select + i; + }else{ + time_pp= s->pp_field_time + field_select - i; + time_pb= s->pb_field_time + field_select - i; + } + s->mv[0][i][0] = s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx; + s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0] + : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp; + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] + : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp; + } + return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED; + }else{ + ff_mpeg4_set_one_direct_mv(s, mx, my, 0); + s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->mv[0][0][0]; + s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->mv[0][0][1]; + s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = s->mv[1][0][0]; + s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = s->mv[1][0][1]; + if((s->avctx->workaround_bugs & FF_BUG_DIRECT_BLOCKSIZE) || !s->quarter_sample) + s->mv_type= MV_TYPE_16X16; + else + s->mv_type= MV_TYPE_8X8; + return MB_TYPE_DIRECT2 | MB_TYPE_16x16 | MB_TYPE_L0L1; //Note see prev line + } +} + +void ff_h263_update_motion_val(MpegEncContext * s){ + const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; + //FIXME a lot of that is only needed for !low_delay + const int wrap = s->b8_stride; + const int xy = s->block_index[0]; + + s->current_picture.mbskip_table[mb_xy]= s->mb_skipped; + + if(s->mv_type != MV_TYPE_8X8){ + int motion_x, motion_y; + if (s->mb_intra) { + motion_x = 0; + motion_y = 0; + } else if (s->mv_type == MV_TYPE_16X16) { + motion_x = s->mv[0][0][0]; + motion_y = s->mv[0][0][1]; + } else /*if (s->mv_type == MV_TYPE_FIELD)*/ { + int i; + motion_x = s->mv[0][0][0] + s->mv[0][1][0]; + motion_y = s->mv[0][0][1] + s->mv[0][1][1]; + motion_x = (motion_x>>1) | (motion_x&1); + for(i=0; i<2; i++){ + s->p_field_mv_table[i][0][mb_xy][0]= s->mv[0][i][0]; + s->p_field_mv_table[i][0][mb_xy][1]= s->mv[0][i][1]; + } + s->current_picture.ref_index[0][xy ]= + s->current_picture.ref_index[0][xy + 1]= s->field_select[0][0]; + s->current_picture.ref_index[0][xy + wrap ]= + s->current_picture.ref_index[0][xy + wrap + 1]= s->field_select[0][1]; + } + + /* no update if 8X8 because it has been done during parsing */ + s->current_picture.motion_val[0][xy][0] = motion_x; + s->current_picture.motion_val[0][xy][1] = motion_y; + s->current_picture.motion_val[0][xy + 1][0] = motion_x; + s->current_picture.motion_val[0][xy + 1][1] = motion_y; + s->current_picture.motion_val[0][xy + wrap][0] = motion_x; + s->current_picture.motion_val[0][xy + wrap][1] = motion_y; + s->current_picture.motion_val[0][xy + 1 + wrap][0] = motion_x; + s->current_picture.motion_val[0][xy + 1 + wrap][1] = motion_y; + } + + if(s->encoding){ //FIXME encoding MUST be cleaned up + if (s->mv_type == MV_TYPE_8X8) + s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8; + else if(s->mb_intra) + s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA; + else + s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16; + } +} + +#ifdef CONFIG_ENCODERS + +static inline int h263_get_motion_length(MpegEncContext * s, int val, int f_code){ + int l, bit_size, code; + + if (val == 0) { + return mvtab[0][1]; + } else { + bit_size = f_code - 1; + /* modulo encoding */ + l= INT_BIT - 6 - bit_size; + val = (val<>l; + val--; + code = (val >> bit_size) + 1; + + return mvtab[code][1] + 1 + bit_size; + } +} + +static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y, int f_code){ + if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){ + skip_put_bits(&s->pb, + h263_get_motion_length(s, x, f_code) + +h263_get_motion_length(s, y, f_code)); + }else{ + ff_h263_encode_motion(s, x, f_code); + ff_h263_encode_motion(s, y, f_code); + } +} + +static inline int get_p_cbp(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y){ + int cbp, i; + + if(s->flags & CODEC_FLAG_CBP_RD){ + int best_cbpy_score= INT_MAX; + int best_cbpc_score= INT_MAX; + int cbpc = (-1), cbpy= (-1); + const int offset= (s->mv_type==MV_TYPE_16X16 ? 0 : 16) + (s->dquant ? 8 : 0); + const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6); + + for(i=0; i<4; i++){ + int score= inter_MCBPC_bits[i + offset] * lambda; + if(i&1) score += s->coded_score[5]; + if(i&2) score += s->coded_score[4]; + + if(score < best_cbpc_score){ + best_cbpc_score= score; + cbpc= i; + } + } + + for(i=0; i<16; i++){ + int score= cbpy_tab[i ^ 0xF][1] * lambda; + if(i&1) score += s->coded_score[3]; + if(i&2) score += s->coded_score[2]; + if(i&4) score += s->coded_score[1]; + if(i&8) score += s->coded_score[0]; + + if(score < best_cbpy_score){ + best_cbpy_score= score; + cbpy= i; + } + } + cbp= cbpc + 4*cbpy; + if ((motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16){ + if(best_cbpy_score + best_cbpc_score + 2*lambda >= 0) + cbp= 0; + } + + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ + s->block_last_index[i]= -1; + memset(s->block[i], 0, sizeof(DCTELEM)*64); + } + } + }else{ + cbp= 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + } + return cbp; +} + +static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64], + int motion_x, int motion_y, int mb_type){ + int cbp=0, i; + + if(s->flags & CODEC_FLAG_CBP_RD){ + int score=0; + const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6); + + for(i=0; i<6; i++){ + if(s->coded_score[i] < 0){ + score += s->coded_score[i]; + cbp |= 1 << (5 - i); + } + } + + if(cbp){ + int zero_score= -6; + if ((motion_x | motion_y | s->dquant | mb_type) == 0){ + zero_score-= 4; //2*MV + mb_type + cbp bit + } + + zero_score*= lambda; + if(zero_score <= score){ + cbp=0; + } + } + + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ + s->block_last_index[i]= -1; + memset(s->block[i], 0, sizeof(DCTELEM)*64); + } + } + }else{ + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + } + return cbp; +} + +static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6], + uint8_t **scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb){ + int i; + + if(scan_table){ + if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){ + for (i = 0; i < 6; i++) { + skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, intra_dc[i], scan_table[i])); + } + }else{ + /* encode each block */ + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i, intra_dc[i], scan_table[i], dc_pb, ac_pb); + } + } + }else{ + if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){ + for (i = 0; i < 6; i++) { + skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, 0, s->intra_scantable.permutated)); + } + }else{ + /* encode each block */ + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, dc_pb, ac_pb); + } + } + } +} + +void mpeg4_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int cbpc, cbpy, pred_x, pred_y; + PutBitContext * const pb2 = s->data_partitioning ? &s->pb2 : &s->pb; + PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb; + PutBitContext * const dc_pb = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2 : &s->pb; + const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0; + const int dquant_code[5]= {1,0,9,2,3}; + + // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); + if (!s->mb_intra) { + int i, cbp; + + if(s->pict_type==B_TYPE){ + static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */ + int mb_type= mb_type_table[s->mv_dir]; + + if(s->mb_x==0){ + for(i=0; i<2; i++){ + s->last_mv[i][0][0]= + s->last_mv[i][0][1]= + s->last_mv[i][1][0]= + s->last_mv[i][1][1]= 0; + } + } + + assert(s->dquant>=-2 && s->dquant<=2); + assert((s->dquant&1)==0); + assert(mb_type>=0); + + /* nothing to do if this MB was skipped in the next P Frame */ + if(s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]){ //FIXME avoid DCT & ... + s->skip_count++; + s->mv[0][0][0]= + s->mv[0][0][1]= + s->mv[1][0][0]= + s->mv[1][0][1]= 0; + s->mv_dir= MV_DIR_FORWARD; //doesn't matter + s->qscale -= s->dquant; +// s->mb_skipped=1; + + return; + } + + cbp= get_b_cbp(s, block, motion_x, motion_y, mb_type); + + if ((cbp | motion_x | motion_y | mb_type) ==0) { + /* direct MB with MV={0,0} */ + assert(s->dquant==0); + + put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */ + + if(interleaved_stats){ + s->misc_bits++; + s->last_bits++; + } + s->skip_count++; + return; + } + + put_bits(&s->pb, 1, 0); /* mb coded modb1=0 */ + put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge + put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :) + if(cbp) put_bits(&s->pb, 6, cbp); + + if(cbp && mb_type){ + if(s->dquant) + put_bits(&s->pb, 2, (s->dquant>>2)+3); + else + put_bits(&s->pb, 1, 0); + }else + s->qscale -= s->dquant; + + if(!s->progressive_sequence){ + if(cbp) + put_bits(&s->pb, 1, s->interlaced_dct); + if(mb_type) // not direct mode + put_bits(&s->pb, 1, s->mv_type == MV_TYPE_FIELD); + } + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + if(mb_type == 0){ + assert(s->mv_dir & MV_DIRECT); + ff_h263_encode_motion_vector(s, motion_x, motion_y, 1); + s->b_count++; + s->f_count++; + }else{ + assert(mb_type > 0 && mb_type < 4); + if(s->mv_type != MV_TYPE_FIELD){ + if(s->mv_dir & MV_DIR_FORWARD){ + ff_h263_encode_motion_vector(s, s->mv[0][0][0] - s->last_mv[0][0][0], + s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); + s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0]; + s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1]; + s->f_count++; + } + if(s->mv_dir & MV_DIR_BACKWARD){ + ff_h263_encode_motion_vector(s, s->mv[1][0][0] - s->last_mv[1][0][0], + s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); + s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0]; + s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1]; + s->b_count++; + } + }else{ + if(s->mv_dir & MV_DIR_FORWARD){ + put_bits(&s->pb, 1, s->field_select[0][0]); + put_bits(&s->pb, 1, s->field_select[0][1]); + } + if(s->mv_dir & MV_DIR_BACKWARD){ + put_bits(&s->pb, 1, s->field_select[1][0]); + put_bits(&s->pb, 1, s->field_select[1][1]); + } + if(s->mv_dir & MV_DIR_FORWARD){ + for(i=0; i<2; i++){ + ff_h263_encode_motion_vector(s, s->mv[0][i][0] - s->last_mv[0][i][0] , + s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0]; + s->last_mv[0][i][1]= s->mv[0][i][1]*2; + } + s->f_count++; + } + if(s->mv_dir & MV_DIR_BACKWARD){ + for(i=0; i<2; i++){ + ff_h263_encode_motion_vector(s, s->mv[1][i][0] - s->last_mv[1][i][0] , + s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code); + s->last_mv[1][i][0]= s->mv[1][i][0]; + s->last_mv[1][i][1]= s->mv[1][i][1]*2; + } + s->b_count++; + } + } + } + + if(interleaved_stats){ + s->mv_bits+= get_bits_diff(s); + } + + mpeg4_encode_blocks(s, block, NULL, NULL, NULL, &s->pb); + + if(interleaved_stats){ + s->p_tex_bits+= get_bits_diff(s); + } + + }else{ /* s->pict_type==B_TYPE */ + cbp= get_p_cbp(s, block, motion_x, motion_y); + + if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) { + /* check if the B frames can skip it too, as we must skip it if we skip here + why didn't they just compress the skip-mb bits instead of reusing them ?! */ + if(s->max_b_frames>0){ + int i; + int x,y, offset; + uint8_t *p_pic; + + x= s->mb_x*16; + y= s->mb_y*16; + if(x+16 > s->width) x= s->width-16; + if(y+16 > s->height) y= s->height-16; + + offset= x + y*s->linesize; + p_pic= s->new_picture.data[0] + offset; + + s->mb_skipped=1; + for(i=0; imax_b_frames; i++){ + uint8_t *b_pic; + int diff; + Picture *pic= s->reordered_input_picture[i+1]; + + if(pic==NULL || pic->pict_type!=B_TYPE) break; + + b_pic= pic->data[0] + offset; + if(pic->type != FF_BUFFER_TYPE_SHARED) + b_pic+= INPLACE_OFFSET; + diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16); + if(diff>s->qscale*70){ //FIXME check that 70 is optimal + s->mb_skipped=0; + break; + } + } + }else + s->mb_skipped=1; + + if(s->mb_skipped==1){ + /* skip macroblock */ + put_bits(&s->pb, 1, 1); + + if(interleaved_stats){ + s->misc_bits++; + s->last_bits++; + } + s->skip_count++; + + return; + } + } + + put_bits(&s->pb, 1, 0); /* mb coded */ + cbpc = cbp & 3; + cbpy = cbp >> 2; + cbpy ^= 0xf; + if(s->mv_type==MV_TYPE_16X16){ + if(s->dquant) cbpc+= 8; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + + put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(pb2, 2, dquant_code[s->dquant+2]); + + if(!s->progressive_sequence){ + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + put_bits(pb2, 1, 0); + } + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + /* motion vectors: 16x16 mode */ + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + + ff_h263_encode_motion_vector(s, motion_x - pred_x, + motion_y - pred_y, s->f_code); + }else if(s->mv_type==MV_TYPE_FIELD){ + if(s->dquant) cbpc+= 8; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + + put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(pb2, 2, dquant_code[s->dquant+2]); + + assert(!s->progressive_sequence); + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + put_bits(pb2, 1, 1); + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + /* motion vectors: 16x8 interlaced mode */ + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + pred_y /=2; + + put_bits(&s->pb, 1, s->field_select[0][0]); + put_bits(&s->pb, 1, s->field_select[0][1]); + + ff_h263_encode_motion_vector(s, s->mv[0][0][0] - pred_x, + s->mv[0][0][1] - pred_y, s->f_code); + ff_h263_encode_motion_vector(s, s->mv[0][1][0] - pred_x, + s->mv[0][1][1] - pred_y, s->f_code); + }else{ + assert(s->mv_type==MV_TYPE_8X8); + put_bits(&s->pb, + inter_MCBPC_bits[cbpc+16], + inter_MCBPC_code[cbpc+16]); + put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + + if(!s->progressive_sequence){ + if(cbp) + put_bits(pb2, 1, s->interlaced_dct); + } + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + for(i=0; i<4; i++){ + /* motion vectors: 8x8 mode*/ + h263_pred_motion(s, i, 0, &pred_x, &pred_y); + + ff_h263_encode_motion_vector(s, s->current_picture.motion_val[0][ s->block_index[i] ][0] - pred_x, + s->current_picture.motion_val[0][ s->block_index[i] ][1] - pred_y, s->f_code); + } + } + + if(interleaved_stats){ + s->mv_bits+= get_bits_diff(s); + } + + mpeg4_encode_blocks(s, block, NULL, NULL, NULL, tex_pb); + + if(interleaved_stats){ + s->p_tex_bits+= get_bits_diff(s); + } + s->f_count++; + } + } else { + int cbp; + int dc_diff[6]; //dc values with the dc prediction subtracted + int dir[6]; //prediction direction + int zigzag_last_index[6]; + uint8_t *scan_table[6]; + int i; + + for(i=0; i<6; i++){ + dc_diff[i]= ff_mpeg4_pred_dc(s, i, block[i][0], &dir[i], 1); + } + + if(s->flags & CODEC_FLAG_AC_PRED){ + s->ac_pred= decide_ac_pred(s, block, dir, scan_table, zigzag_last_index); + if(!s->ac_pred) + restore_ac_coeffs(s, block, dir, scan_table, zigzag_last_index); + }else{ + for(i=0; i<6; i++) + scan_table[i]= s->intra_scantable.permutated; + } + + /* compute cbp */ + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 1) + cbp |= 1 << (5 - i); + } + + cbpc = cbp & 3; + if (s->pict_type == I_TYPE) { + if(s->dquant) cbpc+=4; + put_bits(&s->pb, + intra_MCBPC_bits[cbpc], + intra_MCBPC_code[cbpc]); + } else { + if(s->dquant) cbpc+=8; + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc + 4], + inter_MCBPC_code[cbpc + 4]); + } + put_bits(pb2, 1, s->ac_pred); + cbpy = cbp >> 2; + put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(dc_pb, 2, dquant_code[s->dquant+2]); + + if(!s->progressive_sequence){ + put_bits(dc_pb, 1, s->interlaced_dct); + } + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + mpeg4_encode_blocks(s, block, dc_diff, scan_table, dc_pb, tex_pb); + + if(interleaved_stats){ + s->i_tex_bits+= get_bits_diff(s); + } + s->i_count++; + + /* restore ac coeffs & last_index stuff if we messed them up with the prediction */ + if(s->ac_pred) + restore_ac_coeffs(s, block, dir, scan_table, zigzag_last_index); + } +} + +void h263_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int cbpc, cbpy, i, cbp, pred_x, pred_y; + int16_t pred_dc; + int16_t rec_intradc[6]; + uint16_t *dc_ptr[6]; + const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1); + const int dquant_code[5]= {1,0,9,2,3}; + + //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); + if (!s->mb_intra) { + /* compute cbp */ + cbp= get_p_cbp(s, block, motion_x, motion_y); + + if ((cbp | motion_x | motion_y | s->dquant | (s->mv_type - MV_TYPE_16X16)) == 0) { + /* skip macroblock */ + put_bits(&s->pb, 1, 1); + if(interleaved_stats){ + s->misc_bits++; + s->last_bits++; + } + s->skip_count++; + + return; + } + put_bits(&s->pb, 1, 0); /* mb coded */ + + cbpc = cbp & 3; + cbpy = cbp >> 2; + if(s->alt_inter_vlc==0 || cbpc!=3) + cbpy ^= 0xF; + if(s->dquant) cbpc+= 8; + if(s->mv_type==MV_TYPE_16X16){ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(&s->pb, 2, dquant_code[s->dquant+2]); + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + /* motion vectors: 16x16 mode */ + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + + if (!s->umvplus) { + ff_h263_encode_motion_vector(s, motion_x - pred_x, + motion_y - pred_y, 1); + } + else { + h263p_encode_umotion(s, motion_x - pred_x); + h263p_encode_umotion(s, motion_y - pred_y); + if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1)) + /* To prevent Start Code emulation */ + put_bits(&s->pb,1,1); + } + }else{ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc+16], + inter_MCBPC_code[cbpc+16]); + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(&s->pb, 2, dquant_code[s->dquant+2]); + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + + for(i=0; i<4; i++){ + /* motion vectors: 8x8 mode*/ + h263_pred_motion(s, i, 0, &pred_x, &pred_y); + + motion_x= s->current_picture.motion_val[0][ s->block_index[i] ][0]; + motion_y= s->current_picture.motion_val[0][ s->block_index[i] ][1]; + if (!s->umvplus) { + ff_h263_encode_motion_vector(s, motion_x - pred_x, + motion_y - pred_y, 1); + } + else { + h263p_encode_umotion(s, motion_x - pred_x); + h263p_encode_umotion(s, motion_y - pred_y); + if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1)) + /* To prevent Start Code emulation */ + put_bits(&s->pb,1,1); + } + } + } + + if(interleaved_stats){ + s->mv_bits+= get_bits_diff(s); + } + } else { + assert(s->mb_intra); + + cbp = 0; + if (s->h263_aic) { + /* Predict DC */ + for(i=0; i<6; i++) { + int16_t level = block[i][0]; + int scale; + + if(i<4) scale= s->y_dc_scale; + else scale= s->c_dc_scale; + + pred_dc = h263_pred_dc(s, i, &dc_ptr[i]); + level -= pred_dc; + /* Quant */ + if (level >= 0) + level = (level + (scale>>1))/scale; + else + level = (level - (scale>>1))/scale; + + /* AIC can change CBP */ + if (level == 0 && s->block_last_index[i] == 0) + s->block_last_index[i] = -1; + + if(!s->modified_quant){ + if (level < -127) + level = -127; + else if (level > 127) + level = 127; + } + + block[i][0] = level; + /* Reconstruction */ + rec_intradc[i] = scale*level + pred_dc; + /* Oddify */ + rec_intradc[i] |= 1; + //if ((rec_intradc[i] % 2) == 0) + // rec_intradc[i]++; + /* Clipping */ + if (rec_intradc[i] < 0) + rec_intradc[i] = 0; + else if (rec_intradc[i] > 2047) + rec_intradc[i] = 2047; + + /* Update AC/DC tables */ + *dc_ptr[i] = rec_intradc[i]; + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + }else{ + for(i=0; i<6; i++) { + /* compute cbp */ + if (s->block_last_index[i] >= 1) + cbp |= 1 << (5 - i); + } + } + + cbpc = cbp & 3; + if (s->pict_type == I_TYPE) { + if(s->dquant) cbpc+=4; + put_bits(&s->pb, + intra_MCBPC_bits[cbpc], + intra_MCBPC_code[cbpc]); + } else { + if(s->dquant) cbpc+=8; + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc + 4], + inter_MCBPC_code[cbpc + 4]); + } + if (s->h263_aic) { + /* XXX: currently, we do not try to use ac prediction */ + put_bits(&s->pb, 1, 0); /* no AC prediction */ + } + cbpy = cbp >> 2; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(&s->pb, 2, dquant_code[s->dquant+2]); + + if(interleaved_stats){ + s->misc_bits+= get_bits_diff(s); + } + } + + for(i=0; i<6; i++) { + /* encode each block */ + h263_encode_block(s, block[i], i); + + /* Update INTRADC for decoding */ + if (s->h263_aic && s->mb_intra) { + block[i][0] = rec_intradc[i]; + + } + } + + if(interleaved_stats){ + if (!s->mb_intra) { + s->p_tex_bits+= get_bits_diff(s); + s->f_count++; + }else{ + s->i_tex_bits+= get_bits_diff(s); + s->i_count++; + } + } +} +#endif + +void ff_h263_loop_filter(MpegEncContext * s){ + int qp_c; + const int linesize = s->linesize; + const int uvlinesize= s->uvlinesize; + const int xy = s->mb_y * s->mb_stride + s->mb_x; + uint8_t *dest_y = s->dest[0]; + uint8_t *dest_cb= s->dest[1]; + uint8_t *dest_cr= s->dest[2]; + +// if(s->pict_type==B_TYPE && !s->readable) return; + + /* + Diag Top + Left Center + */ + if(!IS_SKIP(s->current_picture.mb_type[xy])){ + qp_c= s->qscale; + s->dsp.h263_v_loop_filter(dest_y+8*linesize , linesize, qp_c); + s->dsp.h263_v_loop_filter(dest_y+8*linesize+8, linesize, qp_c); + }else + qp_c= 0; + + if(s->mb_y){ + int qp_dt, qp_t, qp_tc; + + if(IS_SKIP(s->current_picture.mb_type[xy-s->mb_stride])) + qp_t=0; + else + qp_t= s->current_picture.qscale_table[xy-s->mb_stride]; + + if(qp_c) + qp_tc= qp_c; + else + qp_tc= qp_t; + + if(qp_tc){ + const int chroma_qp= s->chroma_qscale_table[qp_tc]; + s->dsp.h263_v_loop_filter(dest_y , linesize, qp_tc); + s->dsp.h263_v_loop_filter(dest_y+8, linesize, qp_tc); + + s->dsp.h263_v_loop_filter(dest_cb , uvlinesize, chroma_qp); + s->dsp.h263_v_loop_filter(dest_cr , uvlinesize, chroma_qp); + } + + if(qp_t) + s->dsp.h263_h_loop_filter(dest_y-8*linesize+8 , linesize, qp_t); + + if(s->mb_x){ + if(qp_t || IS_SKIP(s->current_picture.mb_type[xy-1-s->mb_stride])) + qp_dt= qp_t; + else + qp_dt= s->current_picture.qscale_table[xy-1-s->mb_stride]; + + if(qp_dt){ + const int chroma_qp= s->chroma_qscale_table[qp_dt]; + s->dsp.h263_h_loop_filter(dest_y -8*linesize , linesize, qp_dt); + s->dsp.h263_h_loop_filter(dest_cb-8*uvlinesize, uvlinesize, chroma_qp); + s->dsp.h263_h_loop_filter(dest_cr-8*uvlinesize, uvlinesize, chroma_qp); + } + } + } + + if(qp_c){ + s->dsp.h263_h_loop_filter(dest_y +8, linesize, qp_c); + if(s->mb_y + 1 == s->mb_height) + s->dsp.h263_h_loop_filter(dest_y+8*linesize+8, linesize, qp_c); + } + + if(s->mb_x){ + int qp_lc; + if(qp_c || IS_SKIP(s->current_picture.mb_type[xy-1])) + qp_lc= qp_c; + else + qp_lc= s->current_picture.qscale_table[xy-1]; + + if(qp_lc){ + s->dsp.h263_h_loop_filter(dest_y, linesize, qp_lc); + if(s->mb_y + 1 == s->mb_height){ + const int chroma_qp= s->chroma_qscale_table[qp_lc]; + s->dsp.h263_h_loop_filter(dest_y +8* linesize, linesize, qp_lc); + s->dsp.h263_h_loop_filter(dest_cb , uvlinesize, chroma_qp); + s->dsp.h263_h_loop_filter(dest_cr , uvlinesize, chroma_qp); + } + } + } +} + +static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr) +{ + int x, y, wrap, a, c, pred_dc, scale; + int16_t *dc_val; + + /* find prediction */ + if (n < 4) { + x = 2 * s->mb_x + (n & 1); + y = 2 * s->mb_y + ((n & 2) >> 1); + wrap = s->b8_stride; + dc_val = s->dc_val[0]; + scale = s->y_dc_scale; + } else { + x = s->mb_x; + y = s->mb_y; + wrap = s->mb_stride; + dc_val = s->dc_val[n - 4 + 1]; + scale = s->c_dc_scale; + } + /* B C + * A X + */ + a = dc_val[(x - 1) + (y) * wrap]; + c = dc_val[(x) + (y - 1) * wrap]; + + /* No prediction outside GOB boundary */ + if(s->first_slice_line && n!=3){ + if(n!=2) c= 1024; + if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024; + } + pred_dc = 1024; + /* just DC prediction */ + if (a != 1024 && c != 1024) + pred_dc = (a + c) >> 1; + else if (a != 1024) + pred_dc = a; + else + pred_dc = c; + + /* we assume pred is positive */ + //pred_dc = (pred_dc + (scale >> 1)) / scale; + *dc_val_ptr = &dc_val[x + y * wrap]; + return pred_dc; +} + +static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n) +{ + int x, y, wrap, a, c, pred_dc, scale, i; + int16_t *dc_val, *ac_val, *ac_val1; + + /* find prediction */ + if (n < 4) { + x = 2 * s->mb_x + (n & 1); + y = 2 * s->mb_y + (n>> 1); + wrap = s->b8_stride; + dc_val = s->dc_val[0]; + ac_val = s->ac_val[0][0]; + scale = s->y_dc_scale; + } else { + x = s->mb_x; + y = s->mb_y; + wrap = s->mb_stride; + dc_val = s->dc_val[n - 4 + 1]; + ac_val = s->ac_val[n - 4 + 1][0]; + scale = s->c_dc_scale; + } + + ac_val += ((y) * wrap + (x)) * 16; + ac_val1 = ac_val; + + /* B C + * A X + */ + a = dc_val[(x - 1) + (y) * wrap]; + c = dc_val[(x) + (y - 1) * wrap]; + + /* No prediction outside GOB boundary */ + if(s->first_slice_line && n!=3){ + if(n!=2) c= 1024; + if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024; + } + + if (s->ac_pred) { + pred_dc = 1024; + if (s->h263_aic_dir) { + /* left prediction */ + if (a != 1024) { + ac_val -= 16; + for(i=1;i<8;i++) { + block[s->dsp.idct_permutation[i<<3]] += ac_val[i]; + } + pred_dc = a; + } + } else { + /* top prediction */ + if (c != 1024) { + ac_val -= 16 * wrap; + for(i=1;i<8;i++) { + block[s->dsp.idct_permutation[i ]] += ac_val[i + 8]; + } + pred_dc = c; + } + } + } else { + /* just DC prediction */ + if (a != 1024 && c != 1024) + pred_dc = (a + c) >> 1; + else if (a != 1024) + pred_dc = a; + else + pred_dc = c; + } + + /* we assume pred is positive */ + block[0]=block[0]*scale + pred_dc; + + if (block[0] < 0) + block[0] = 0; + else + block[0] |= 1; + + /* Update AC/DC tables */ + dc_val[(x) + (y) * wrap] = block[0]; + + /* left copy */ + for(i=1;i<8;i++) + ac_val1[i ] = block[s->dsp.idct_permutation[i<<3]]; + /* top copy */ + for(i=1;i<8;i++) + ac_val1[8 + i] = block[s->dsp.idct_permutation[i ]]; +} + +int16_t *h263_pred_motion(MpegEncContext * s, int block, int dir, + int *px, int *py) +{ + int wrap; + int16_t *A, *B, *C, (*mot_val)[2]; + static const int off[4]= {2, 1, 1, -1}; + + wrap = s->b8_stride; + mot_val = s->current_picture.motion_val[dir] + s->block_index[block]; + + A = mot_val[ - 1]; + /* special case for first (slice) line */ + if (s->first_slice_line && block<3) { + // we can't just change some MVs to simulate that as we need them for the B frames (and ME) + // and if we ever support non rectangular objects than we need to do a few ifs here anyway :( + if(block==0){ //most common case + if(s->mb_x == s->resync_mb_x){ //rare + *px= *py = 0; + }else if(s->mb_x + 1 == s->resync_mb_x && s->h263_pred){ //rare + C = mot_val[off[block] - wrap]; + if(s->mb_x==0){ + *px = C[0]; + *py = C[1]; + }else{ + *px = mid_pred(A[0], 0, C[0]); + *py = mid_pred(A[1], 0, C[1]); + } + }else{ + *px = A[0]; + *py = A[1]; + } + }else if(block==1){ + if(s->mb_x + 1 == s->resync_mb_x && s->h263_pred){ //rare + C = mot_val[off[block] - wrap]; + *px = mid_pred(A[0], 0, C[0]); + *py = mid_pred(A[1], 0, C[1]); + }else{ + *px = A[0]; + *py = A[1]; + } + }else{ /* block==2*/ + B = mot_val[ - wrap]; + C = mot_val[off[block] - wrap]; + if(s->mb_x == s->resync_mb_x) //rare + A[0]=A[1]=0; + + *px = mid_pred(A[0], B[0], C[0]); + *py = mid_pred(A[1], B[1], C[1]); + } + } else { + B = mot_val[ - wrap]; + C = mot_val[off[block] - wrap]; + *px = mid_pred(A[0], B[0], C[0]); + *py = mid_pred(A[1], B[1], C[1]); + } + return *mot_val; +} + +#ifdef CONFIG_ENCODERS +void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code) +{ + int range, l, bit_size, sign, code, bits; + + if (val == 0) { + /* zero vector */ + code = 0; + put_bits(&s->pb, mvtab[code][1], mvtab[code][0]); + } else { + bit_size = f_code - 1; + range = 1 << bit_size; + /* modulo encoding */ + l= INT_BIT - 6 - bit_size; + val = (val<>l; + sign = val>>31; + val= (val^sign)-sign; + sign&=1; + + val--; + code = (val >> bit_size) + 1; + bits = val & (range - 1); + + put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); + if (bit_size > 0) { + put_bits(&s->pb, bit_size, bits); + } + } +} + +/* Encode MV differences on H.263+ with Unrestricted MV mode */ +static void h263p_encode_umotion(MpegEncContext * s, int val) +{ + short sval = 0; + short i = 0; + short n_bits = 0; + short temp_val; + int code = 0; + int tcode; + + if ( val == 0) + put_bits(&s->pb, 1, 1); + else if (val == 1) + put_bits(&s->pb, 3, 0); + else if (val == -1) + put_bits(&s->pb, 3, 2); + else { + + sval = ((val < 0) ? (short)(-val):(short)val); + temp_val = sval; + + while (temp_val != 0) { + temp_val = temp_val >> 1; + n_bits++; + } + + i = n_bits - 1; + while (i > 0) { + tcode = (sval & (1 << (i-1))) >> (i-1); + tcode = (tcode << 1) | 1; + code = (code << 2) | tcode; + i--; + } + code = ((code << 1) | (val < 0)) << 1; + put_bits(&s->pb, (2*n_bits)+1, code); + //printf("\nVal = %d\tCode = %d", sval, code); + } +} + +static void init_mv_penalty_and_fcode(MpegEncContext *s) +{ + int f_code; + int mv; + + if(mv_penalty==NULL) + mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) ); + + for(f_code=1; f_code<=MAX_FCODE; f_code++){ + for(mv=-MAX_MV; mv<=MAX_MV; mv++){ + int len; + + if(mv==0) len= mvtab[0][1]; + else{ + int val, bit_size, range, code; + + bit_size = f_code - 1; + range = 1 << bit_size; + + val=mv; + if (val < 0) + val = -val; + val--; + code = (val >> bit_size) + 1; + if(code<33){ + len= mvtab[code][1] + 1 + bit_size; + }else{ + len= mvtab[32][1] + av_log2(code>>5) + 2 + bit_size; + } + } + + mv_penalty[f_code][mv+MAX_MV]= len; + } + } + + for(f_code=MAX_FCODE; f_code>0; f_code--){ + for(mv=-(16<>= 1; + size++; + } + + if (level < 0) + l= (-level) ^ ((1 << size) - 1); + else + l= level; + + /* luminance */ + uni_code= DCtab_lum[size][0]; + uni_len = DCtab_lum[size][1]; + + if (size > 0) { + uni_code<<=size; uni_code|=l; + uni_len+=size; + if (size > 8){ + uni_code<<=1; uni_code|=1; + uni_len++; + } + } + uni_DCtab_lum_bits[level+256]= uni_code; + uni_DCtab_lum_len [level+256]= uni_len; + + /* chrominance */ + uni_code= DCtab_chrom[size][0]; + uni_len = DCtab_chrom[size][1]; + + if (size > 0) { + uni_code<<=size; uni_code|=l; + uni_len+=size; + if (size > 8){ + uni_code<<=1; uni_code|=1; + uni_len++; + } + } + uni_DCtab_chrom_bits[level+256]= uni_code; + uni_DCtab_chrom_len [level+256]= uni_len; + + } +} + +#endif //CONFIG_ENCODERS + +#ifdef CONFIG_ENCODERS +static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){ + int slevel, run, last; + + assert(MAX_LEVEL >= 64); + assert(MAX_RUN >= 63); + + for(slevel=-64; slevel<64; slevel++){ + if(slevel==0) continue; + for(run=0; run<64; run++){ + for(last=0; last<=1; last++){ + const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64); + int level= slevel < 0 ? -slevel : slevel; + int sign= slevel < 0 ? 1 : 0; + int bits, len, code; + int level1, run1; + + len_tab[index]= 100; + + /* ESC0 */ + code= get_rl_index(rl, last, run, level); + bits= rl->table_vlc[code][0]; + len= rl->table_vlc[code][1]; + bits=bits*2+sign; len++; + + if(code!=rl->n && len < len_tab[index]){ + bits_tab[index]= bits; + len_tab [index]= len; + } +#if 1 + /* ESC1 */ + bits= rl->table_vlc[rl->n][0]; + len= rl->table_vlc[rl->n][1]; + bits=bits*2; len++; //esc1 + level1= level - rl->max_level[last][run]; + if(level1>0){ + code= get_rl_index(rl, last, run, level1); + bits<<= rl->table_vlc[code][1]; + len += rl->table_vlc[code][1]; + bits += rl->table_vlc[code][0]; + bits=bits*2+sign; len++; + + if(code!=rl->n && len < len_tab[index]){ + bits_tab[index]= bits; + len_tab [index]= len; + } + } +#endif +#if 1 + /* ESC2 */ + bits= rl->table_vlc[rl->n][0]; + len= rl->table_vlc[rl->n][1]; + bits=bits*4+2; len+=2; //esc2 + run1 = run - rl->max_run[last][level] - 1; + if(run1>=0){ + code= get_rl_index(rl, last, run1, level); + bits<<= rl->table_vlc[code][1]; + len += rl->table_vlc[code][1]; + bits += rl->table_vlc[code][0]; + bits=bits*2+sign; len++; + + if(code!=rl->n && len < len_tab[index]){ + bits_tab[index]= bits; + len_tab [index]= len; + } + } +#endif + /* ESC3 */ + bits= rl->table_vlc[rl->n][0]; + len = rl->table_vlc[rl->n][1]; + bits=bits*4+3; len+=2; //esc3 + bits=bits*2+last; len++; + bits=bits*64+run; len+=6; + bits=bits*2+1; len++; //marker + bits=bits*4096+(slevel&0xfff); len+=12; + bits=bits*2+1; len++; //marker + + if(len < len_tab[index]){ + bits_tab[index]= bits; + len_tab [index]= len; + } + } + } + } +} + +static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){ + int slevel, run, last; + + assert(MAX_LEVEL >= 64); + assert(MAX_RUN >= 63); + + for(slevel=-64; slevel<64; slevel++){ + if(slevel==0) continue; + for(run=0; run<64; run++){ + for(last=0; last<=1; last++){ + const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64); + int level= slevel < 0 ? -slevel : slevel; + int sign= slevel < 0 ? 1 : 0; + int bits, len, code; + + len_tab[index]= 100; + + /* ESC0 */ + code= get_rl_index(rl, last, run, level); + bits= rl->table_vlc[code][0]; + len= rl->table_vlc[code][1]; + bits=bits*2+sign; len++; + + if(code!=rl->n && len < len_tab[index]){ + if(bits_tab) bits_tab[index]= bits; + len_tab [index]= len; + } + /* ESC */ + bits= rl->table_vlc[rl->n][0]; + len = rl->table_vlc[rl->n][1]; + bits=bits*2+last; len++; + bits=bits*64+run; len+=6; + bits=bits*256+(level&0xff); len+=8; + + if(len < len_tab[index]){ + if(bits_tab) bits_tab[index]= bits; + len_tab [index]= len; + } + } + } + } +} + +void h263_encode_init(MpegEncContext *s) +{ + static int done = 0; + + if (!done) { + done = 1; + + init_uni_dc_tab(); + + init_rl(&rl_inter, 1); + init_rl(&rl_intra, 1); + init_rl(&rl_intra_aic, 1); + + init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len); + init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len); + + init_uni_h263_rl_tab(&rl_intra_aic, NULL, uni_h263_intra_aic_rl_len); + init_uni_h263_rl_tab(&rl_inter , NULL, uni_h263_inter_rl_len); + + init_mv_penalty_and_fcode(s); + } + s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p + + s->intra_ac_vlc_length =s->inter_ac_vlc_length = uni_h263_inter_rl_len; + s->intra_ac_vlc_last_length=s->inter_ac_vlc_last_length= uni_h263_inter_rl_len + 128*64; + if(s->h263_aic){ + s->intra_ac_vlc_length = uni_h263_intra_aic_rl_len; + s->intra_ac_vlc_last_length= uni_h263_intra_aic_rl_len + 128*64; + } + s->ac_esc_length= 7+1+6+8; + + // use fcodes >1 only for mpeg4 & h263 & h263p FIXME + switch(s->codec_id){ + case CODEC_ID_MPEG4: + s->fcode_tab= fcode_tab; + s->min_qcoeff= -2048; + s->max_qcoeff= 2047; + s->intra_ac_vlc_length = uni_mpeg4_intra_rl_len; + s->intra_ac_vlc_last_length= uni_mpeg4_intra_rl_len + 128*64; + s->inter_ac_vlc_length = uni_mpeg4_inter_rl_len; + s->inter_ac_vlc_last_length= uni_mpeg4_inter_rl_len + 128*64; + s->luma_dc_vlc_length= uni_DCtab_lum_len; + s->chroma_dc_vlc_length= uni_DCtab_chrom_len; + s->ac_esc_length= 7+2+1+6+1+12+1; + s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; + s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table; + + if(s->flags & CODEC_FLAG_GLOBAL_HEADER){ + + s->avctx->extradata= av_malloc(1024); + init_put_bits(&s->pb, s->avctx->extradata, 1024); + + if(!(s->workaround_bugs & FF_BUG_MS)) + mpeg4_encode_visual_object_header(s); + mpeg4_encode_vol_header(s, 0, 0); + +// ff_mpeg4_stuffing(&s->pb); ? + flush_put_bits(&s->pb); + s->avctx->extradata_size= (put_bits_count(&s->pb)+7)>>3; + } + + break; + case CODEC_ID_H263P: + if(s->umvplus) + s->fcode_tab= umv_fcode_tab; + if(s->modified_quant){ + s->min_qcoeff= -2047; + s->max_qcoeff= 2047; + }else{ + s->min_qcoeff= -127; + s->max_qcoeff= 127; + } + break; + //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later + case CODEC_ID_FLV1: + if (s->h263_flv > 1) { + s->min_qcoeff= -1023; + s->max_qcoeff= 1023; + } else { + s->min_qcoeff= -127; + s->max_qcoeff= 127; + } + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + break; + default: //nothing needed - default table already set in mpegvideo.c + s->min_qcoeff= -127; + s->max_qcoeff= 127; + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + } +} + +/** + * encodes a 8x8 block. + * @param block the 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) +{ + int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code; + RLTable *rl; + + rl = &rl_inter; + if (s->mb_intra && !s->h263_aic) { + /* DC coef */ + level = block[0]; + /* 255 cannot be represented, so we clamp */ + if (level > 254) { + level = 254; + block[0] = 254; + } + /* 0 cannot be represented also */ + else if (level < 1) { + level = 1; + block[0] = 1; + } + if (level == 128) //FIXME check rv10 + put_bits(&s->pb, 8, 0xff); + else + put_bits(&s->pb, 8, level); + i = 1; + } else { + i = 0; + if (s->h263_aic && s->mb_intra) + rl = &rl_intra_aic; + + if(s->alt_inter_vlc && !s->mb_intra){ + int aic_vlc_bits=0; + int inter_vlc_bits=0; + int wrong_pos=-1; + int aic_code; + + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + + if(level<0) level= -level; + + code = get_rl_index(rl, last, run, level); + aic_code = get_rl_index(&rl_intra_aic, last, run, level); + inter_vlc_bits += rl->table_vlc[code][1]+1; + aic_vlc_bits += rl_intra_aic.table_vlc[aic_code][1]+1; + + if (code == rl->n) { + inter_vlc_bits += 1+6+8-1; + } + if (aic_code == rl_intra_aic.n) { + aic_vlc_bits += 1+6+8-1; + wrong_pos += run + 1; + }else + wrong_pos += wrong_run[aic_code]; + last_non_zero = i; + } + } + i = 0; + if(aic_vlc_bits < inter_vlc_bits && wrong_pos > 63) + rl = &rl_intra_aic; + } + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, last, run, level); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + if(s->h263_flv <= 1){ + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + + assert(slevel != 0); + + if(level < 128) + put_bits(&s->pb, 8, slevel & 0xff); + else{ + put_bits(&s->pb, 8, 128); + put_bits(&s->pb, 5, slevel & 0x1f); + put_bits(&s->pb, 6, (slevel>>5)&0x3f); + } + }else{ + if(level < 64) { // 7-bit level + put_bits(&s->pb, 1, 0); + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + + put_bits(&s->pb, 7, slevel & 0x7f); + } else { + /* 11-bit level */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + + put_bits(&s->pb, 11, slevel & 0x7ff); + } + } + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } +} +#endif + +#ifdef CONFIG_ENCODERS + +/***************************************************/ +/** + * add mpeg4 stuffing bits (01...1) + */ +void ff_mpeg4_stuffing(PutBitContext * pbc) +{ + int length; + put_bits(pbc, 1, 0); + length= (-put_bits_count(pbc))&7; + if(length) put_bits(pbc, length, (1<current_picture_ptr->pts != AV_NOPTS_VALUE); + s->time= s->current_picture_ptr->pts*s->avctx->time_base.num; + + time_div= s->time/s->avctx->time_base.den; + time_mod= s->time%s->avctx->time_base.den; + + if(s->pict_type==B_TYPE){ + s->pb_time= s->pp_time - (s->last_non_b_time - s->time); + assert(s->pb_time > 0 && s->pb_time < s->pp_time); + ff_mpeg4_init_direct_mv(s); + }else{ + s->last_time_base= s->time_base; + s->time_base= time_div; + s->pp_time= s->time - s->last_non_b_time; + s->last_non_b_time= s->time; + assert(picture_number==0 || s->pp_time > 0); + } +} + +static void mpeg4_encode_gop_header(MpegEncContext * s){ + int hours, minutes, seconds; + int64_t time; + + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, GOP_STARTCODE); + + time= s->current_picture_ptr->pts; + if(s->reordered_input_picture[1]) + time= FFMIN(time, s->reordered_input_picture[1]->pts); + time= time*s->avctx->time_base.num; + + seconds= time/s->avctx->time_base.den; + minutes= seconds/60; seconds %= 60; + hours= minutes/60; minutes %= 60; + hours%=24; + + put_bits(&s->pb, 5, hours); + put_bits(&s->pb, 6, minutes); + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 6, seconds); + + put_bits(&s->pb, 1, !!(s->flags&CODEC_FLAG_CLOSED_GOP)); + put_bits(&s->pb, 1, 0); //broken link == NO + + s->last_time_base= time / s->avctx->time_base.den; + + ff_mpeg4_stuffing(&s->pb); +} + +static void mpeg4_encode_visual_object_header(MpegEncContext * s){ + int profile_and_level_indication; + int vo_ver_id; + + if(s->avctx->profile != FF_PROFILE_UNKNOWN){ + profile_and_level_indication = s->avctx->profile << 4; + }else if(s->max_b_frames || s->quarter_sample){ + profile_and_level_indication= 0xF0; // adv simple + }else{ + profile_and_level_indication= 0x00; // simple + } + + if(s->avctx->level != FF_LEVEL_UNKNOWN){ + profile_and_level_indication |= s->avctx->level; + }else{ + profile_and_level_indication |= 1; //level 1 + } + + if(profile_and_level_indication>>4 == 0xF){ + vo_ver_id= 5; + }else{ + vo_ver_id= 1; + } + + //FIXME levels + + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, VOS_STARTCODE); + + put_bits(&s->pb, 8, profile_and_level_indication); + + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, VISUAL_OBJ_STARTCODE); + + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 4, vo_ver_id); + put_bits(&s->pb, 3, 1); //priority + + put_bits(&s->pb, 4, 1); //visual obj type== video obj + + put_bits(&s->pb, 1, 0); //video signal type == no clue //FIXME + + ff_mpeg4_stuffing(&s->pb); +} + +static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_number) +{ + int vo_ver_id; + + if(s->max_b_frames || s->quarter_sample){ + vo_ver_id= 5; + s->vo_type= ADV_SIMPLE_VO_TYPE; + }else{ + vo_ver_id= 1; + s->vo_type= SIMPLE_VO_TYPE; + } + + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, 0x100 + vo_number); /* video obj */ + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, 0x120 + vol_number); /* video obj layer */ + + put_bits(&s->pb, 1, 0); /* random access vol */ + put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */ + if(s->workaround_bugs & FF_BUG_MS) { + put_bits(&s->pb, 1, 0); /* is obj layer id= no */ + } else { + put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ + put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ + put_bits(&s->pb, 3, 1); /* is obj layer priority */ + } + + aspect_to_info(s, s->avctx->sample_aspect_ratio); + + put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */ + if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){ + put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num); + put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den); + } + + if(s->workaround_bugs & FF_BUG_MS) { // + put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */ + } else { + put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ + put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ + put_bits(&s->pb, 1, s->low_delay); + put_bits(&s->pb, 1, 0); /* vbv parameters= no */ + } + + put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ + put_bits(&s->pb, 1, 1); /* marker bit */ + + put_bits(&s->pb, 16, s->avctx->time_base.den); + if (s->time_increment_bits < 1) + s->time_increment_bits = 1; + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 1, 0); /* fixed vop rate=no */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 13, s->width); /* vol width */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 13, s->height); /* vol height */ + put_bits(&s->pb, 1, 1); /* marker bit */ + put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1); + put_bits(&s->pb, 1, 1); /* obmc disable */ + if (vo_ver_id == 1) { + put_bits(&s->pb, 1, s->vol_sprite_usage); /* sprite enable */ + }else{ + put_bits(&s->pb, 2, s->vol_sprite_usage); /* sprite enable */ + } + + put_bits(&s->pb, 1, 0); /* not 8 bit == false */ + put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/ + + if(s->mpeg_quant){ + ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix); + ff_write_quant_matrix(&s->pb, s->avctx->inter_matrix); + } + + if (vo_ver_id != 1) + put_bits(&s->pb, 1, s->quarter_sample); + put_bits(&s->pb, 1, 1); /* complexity estimation disable */ + s->resync_marker= s->rtp_mode; + put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */ + put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0); + if(s->data_partitioning){ + put_bits(&s->pb, 1, 0); /* no rvlc */ + } + + if (vo_ver_id != 1){ + put_bits(&s->pb, 1, 0); /* newpred */ + put_bits(&s->pb, 1, 0); /* reduced res vop */ + } + put_bits(&s->pb, 1, 0); /* scalability */ + + ff_mpeg4_stuffing(&s->pb); + + /* user data */ + if(!(s->flags & CODEC_FLAG_BITEXACT)){ + put_bits(&s->pb, 16, 0); + put_bits(&s->pb, 16, 0x1B2); /* user_data */ + ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0); + } +} + +/* write mpeg4 VOP header */ +void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) +{ + int time_incr; + int time_div, time_mod; + + if(s->pict_type==I_TYPE){ + if(!(s->flags&CODEC_FLAG_GLOBAL_HEADER)){ + if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT) //HACK, the reference sw is buggy + mpeg4_encode_visual_object_header(s); + if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT || picture_number==0) //HACK, the reference sw is buggy + mpeg4_encode_vol_header(s, 0, 0); + } + if(!(s->workaround_bugs & FF_BUG_MS)) + mpeg4_encode_gop_header(s); + } + + s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE; + +//printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE); + + put_bits(&s->pb, 16, 0); /* vop header */ + put_bits(&s->pb, 16, VOP_STARTCODE); /* vop header */ + put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ + + assert(s->time>=0); + time_div= s->time/s->avctx->time_base.den; + time_mod= s->time%s->avctx->time_base.den; + time_incr= time_div - s->last_time_base; + assert(time_incr >= 0); + while(time_incr--) + put_bits(&s->pb, 1, 1); + + put_bits(&s->pb, 1, 0); + + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */ + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, 1, 1); /* vop coded */ + if ( s->pict_type == P_TYPE + || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { + put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ + } + put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ + if(!s->progressive_sequence){ + put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first); + put_bits(&s->pb, 1, s->alternate_scan); + } + //FIXME sprite stuff + + put_bits(&s->pb, 5, s->qscale); + + if (s->pict_type != I_TYPE) + put_bits(&s->pb, 3, s->f_code); /* fcode_for */ + if (s->pict_type == B_TYPE) + put_bits(&s->pb, 3, s->b_code); /* fcode_back */ + // printf("****frame %d\n", picture_number); +} + +#endif //CONFIG_ENCODERS + +/** + * set qscale and update qscale dependant variables. + */ +void ff_set_qscale(MpegEncContext * s, int qscale) +{ + if (qscale < 1) + qscale = 1; + else if (qscale > 31) + qscale = 31; + + s->qscale = qscale; + s->chroma_qscale= s->chroma_qscale_table[qscale]; + + s->y_dc_scale= s->y_dc_scale_table[ qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ]; +} + +/** + * predicts the dc. + * encoding quantized level -> quantized diff + * decoding quantized diff -> quantized level + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir_ptr pointer to an integer where the prediction direction will be stored + */ +static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding) +{ + int a, b, c, wrap, pred, scale, ret; + uint16_t *dc_val; + + /* find prediction */ + if (n < 4) { + scale = s->y_dc_scale; + } else { + scale = s->c_dc_scale; + } + if(IS_3IV1) + scale= 8; + + wrap= s->block_wrap[n]; + dc_val = s->dc_val[0] + s->block_index[n]; + + /* B C + * A X + */ + a = dc_val[ - 1]; + b = dc_val[ - 1 - wrap]; + c = dc_val[ - wrap]; + + /* outside slice handling (we can't do that by memset as we need the dc for error resilience) */ + if(s->first_slice_line && n!=3){ + if(n!=2) b=c= 1024; + if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024; + } + if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1){ + if(n==0 || n==4 || n==5) + b=1024; + } + + if (abs(a - b) < abs(b - c)) { + pred = c; + *dir_ptr = 1; /* top */ + } else { + pred = a; + *dir_ptr = 0; /* left */ + } + /* we assume pred is positive */ + pred = FASTDIV((pred + (scale >> 1)), scale); + + if(encoding){ + ret = level - pred; + }else{ + level += pred; + ret= level; + if(s->error_resilience>=3){ + if(level<0){ + av_log(s->avctx, AV_LOG_ERROR, "dc<0 at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + if(level*scale > 2048 + scale){ + av_log(s->avctx, AV_LOG_ERROR, "dc overflow at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + } + } + level *=scale; + if(level&(~2047)){ + if(level<0) + level=0; + else if(!(s->workaround_bugs&FF_BUG_DC_CLIP)) + level=2047; + } + dc_val[0]= level; + + return ret; +} + +/** + * predicts the ac. + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir the ac prediction direction + */ +void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n, + int dir) +{ + int i; + int16_t *ac_val, *ac_val1; + int8_t * const qscale_table= s->current_picture.qscale_table; + + /* find prediction */ + ac_val = s->ac_val[0][0] + s->block_index[n] * 16; + ac_val1 = ac_val; + if (s->ac_pred) { + if (dir == 0) { + const int xy= s->mb_x-1 + s->mb_y*s->mb_stride; + /* left prediction */ + ac_val -= 16; + + if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){ + /* same qscale */ + for(i=1;i<8;i++) { + block[s->dsp.idct_permutation[i<<3]] += ac_val[i]; + } + }else{ + /* different qscale, we must rescale */ + for(i=1;i<8;i++) { + block[s->dsp.idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale); + } + } + } else { + const int xy= s->mb_x + s->mb_y*s->mb_stride - s->mb_stride; + /* top prediction */ + ac_val -= 16 * s->block_wrap[n]; + + if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){ + /* same qscale */ + for(i=1;i<8;i++) { + block[s->dsp.idct_permutation[i]] += ac_val[i + 8]; + } + }else{ + /* different qscale, we must rescale */ + for(i=1;i<8;i++) { + block[s->dsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale); + } + } + } + } + /* left copy */ + for(i=1;i<8;i++) + ac_val1[i ] = block[s->dsp.idct_permutation[i<<3]]; + + /* top copy */ + for(i=1;i<8;i++) + ac_val1[8 + i] = block[s->dsp.idct_permutation[i ]]; + +} + +#ifdef CONFIG_ENCODERS + +/** + * encodes the dc value. + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) +{ +#if 1 +// if(level<-255 || level>255) printf("dc overflow\n"); + level+=256; + if (n < 4) { + /* luminance */ + put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); + } else { + /* chrominance */ + put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); + } +#else + int size, v; + /* find number of bits */ + size = 0; + v = abs(level); + while (v) { + v >>= 1; + size++; + } + + if (n < 4) { + /* luminance */ + put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); + } else { + /* chrominance */ + put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); + } + + /* encode remaining bits */ + if (size > 0) { + if (level < 0) + level = (-level) ^ ((1 << size) - 1); + put_bits(&s->pb, size, level); + if (size > 8) + put_bits(&s->pb, 1, 1); + } +#endif +} + +static inline int mpeg4_get_dc_length(int level, int n){ + if (n < 4) { + return uni_DCtab_lum_len[level + 256]; + } else { + return uni_DCtab_chrom_len[level + 256]; + } +} + +/** + * encodes a 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, + uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb) +{ + int i, last_non_zero; +#if 0 //variables for the outcommented version + int code, sign, last; +#endif + const RLTable *rl; + uint32_t *bits_tab; + uint8_t *len_tab; + const int last_index = s->block_last_index[n]; + + if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away + /* mpeg4 based DC predictor */ + mpeg4_encode_dc(dc_pb, intra_dc, n); + if(last_index<1) return; + i = 1; + rl = &rl_intra; + bits_tab= uni_mpeg4_intra_rl_bits; + len_tab = uni_mpeg4_intra_rl_len; + } else { + if(last_index<0) return; + i = 0; + rl = &rl_inter; + bits_tab= uni_mpeg4_inter_rl_bits; + len_tab = uni_mpeg4_inter_rl_len; + } + + /* AC coefs */ + last_non_zero = i - 1; +#if 1 + for (; i < last_index; i++) { + int level = block[ scan_table[i] ]; + if (level) { + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(0, run, level); + put_bits(ac_pb, len_tab[index], bits_tab[index]); + }else{ //ESC3 + put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); + } + last_non_zero = i; + } + } + /*if(i<=last_index)*/{ + int level = block[ scan_table[i] ]; + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(1, run, level); + put_bits(ac_pb, len_tab[index], bits_tab[index]); + }else{ //ESC3 + put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); + } + } +#else + for (; i <= last_index; i++) { + const int slevel = block[ scan_table[i] ]; + if (slevel) { + int level; + int run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + level = slevel; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, last, run, level); + put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + int level1, run1; + level1 = level - rl->max_level[last][run]; + if (level1 < 1) + goto esc2; + code = get_rl_index(rl, last, run, level1); + if (code == rl->n) { + esc2: + put_bits(ac_pb, 1, 1); + if (level > MAX_LEVEL) + goto esc3; + run1 = run - rl->max_run[last][level] - 1; + if (run1 < 0) + goto esc3; + code = get_rl_index(rl, last, run1, level); + if (code == rl->n) { + esc3: + /* third escape */ + put_bits(ac_pb, 1, 1); + put_bits(ac_pb, 1, last); + put_bits(ac_pb, 6, run); + put_bits(ac_pb, 1, 1); + put_bits(ac_pb, 12, slevel & 0xfff); + put_bits(ac_pb, 1, 1); + } else { + /* second escape */ + put_bits(ac_pb, 1, 0); + put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(ac_pb, 1, sign); + } + } else { + /* first escape */ + put_bits(ac_pb, 1, 0); + put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(ac_pb, 1, sign); + } + } else { + put_bits(ac_pb, 1, sign); + } + last_non_zero = i; + } + } +#endif +} + +static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, + uint8_t *scan_table) +{ + int i, last_non_zero; + const RLTable *rl; + uint8_t *len_tab; + const int last_index = s->block_last_index[n]; + int len=0; + + if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away + /* mpeg4 based DC predictor */ + len += mpeg4_get_dc_length(intra_dc, n); + if(last_index<1) return len; + i = 1; + rl = &rl_intra; + len_tab = uni_mpeg4_intra_rl_len; + } else { + if(last_index<0) return 0; + i = 0; + rl = &rl_inter; + len_tab = uni_mpeg4_inter_rl_len; + } + + /* AC coefs */ + last_non_zero = i - 1; + for (; i < last_index; i++) { + int level = block[ scan_table[i] ]; + if (level) { + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(0, run, level); + len += len_tab[index]; + }else{ //ESC3 + len += 7+2+1+6+1+12+1; + } + last_non_zero = i; + } + } + /*if(i<=last_index)*/{ + int level = block[ scan_table[i] ]; + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(1, run, level); + len += len_tab[index]; + }else{ //ESC3 + len += 7+2+1+6+1+12+1; + } + } + + return len; +} + +#endif + + +/***********************************************/ +/* decoding */ + +static VLC intra_MCBPC_vlc; +static VLC inter_MCBPC_vlc; +static VLC cbpy_vlc; +static VLC mv_vlc; +static VLC dc_lum, dc_chrom; +static VLC sprite_trajectory; +static VLC mb_type_b_vlc; +static VLC h263_mbtype_b_vlc; +static VLC cbpc_b_vlc; + +void init_vlc_rl(RLTable *rl, int use_static) +{ + int i, q; + + /* Return if static table is already initialized */ + if(use_static && rl->rl_vlc[0]) + return; + + init_vlc(&rl->vlc, 9, rl->n + 1, + &rl->table_vlc[0][1], 4, 2, + &rl->table_vlc[0][0], 4, 2, use_static); + + + for(q=0; q<32; q++){ + int qmul= q*2; + int qadd= (q-1)|1; + + if(q==0){ + qmul=1; + qadd=0; + } + if(use_static) + rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); + else + rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); + for(i=0; ivlc.table_size; i++){ + int code= rl->vlc.table[i][0]; + int len = rl->vlc.table[i][1]; + int level, run; + + if(len==0){ // illegal code + run= 66; + level= MAX_LEVEL; + }else if(len<0){ //more bits needed + run= 0; + level= code; + }else{ + if(code==rl->n){ //esc + run= 66; + level= 0; + }else{ + run= rl->table_run [code] + 1; + level= rl->table_level[code] * qmul + qadd; + if(code >= rl->last) run+=192; + } + } + rl->rl_vlc[q][i].len= len; + rl->rl_vlc[q][i].level= level; + rl->rl_vlc[q][i].run= run; + } + } +} + +/* init vlcs */ + +/* XXX: find a better solution to handle static init */ +void h263_decode_init_vlc(MpegEncContext *s) +{ + static int done = 0; + + if (!done) { + done = 1; + + init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9, + intra_MCBPC_bits, 1, 1, + intra_MCBPC_code, 1, 1, 1); + init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28, + inter_MCBPC_bits, 1, 1, + inter_MCBPC_code, 1, 1, 1); + init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16, + &cbpy_tab[0][1], 2, 1, + &cbpy_tab[0][0], 2, 1, 1); + init_vlc(&mv_vlc, MV_VLC_BITS, 33, + &mvtab[0][1], 2, 1, + &mvtab[0][0], 2, 1, 1); + init_rl(&rl_inter, 1); + init_rl(&rl_intra, 1); + init_rl(&rvlc_rl_inter, 1); + init_rl(&rvlc_rl_intra, 1); + init_rl(&rl_intra_aic, 1); + init_vlc_rl(&rl_inter, 1); + init_vlc_rl(&rl_intra, 1); + init_vlc_rl(&rvlc_rl_inter, 1); + init_vlc_rl(&rvlc_rl_intra, 1); + init_vlc_rl(&rl_intra_aic, 1); + init_vlc(&dc_lum, DC_VLC_BITS, 10 /* 13 */, + &DCtab_lum[0][1], 2, 1, + &DCtab_lum[0][0], 2, 1, 1); + init_vlc(&dc_chrom, DC_VLC_BITS, 10 /* 13 */, + &DCtab_chrom[0][1], 2, 1, + &DCtab_chrom[0][0], 2, 1, 1); + init_vlc(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15, + &sprite_trajectory_tab[0][1], 4, 2, + &sprite_trajectory_tab[0][0], 4, 2, 1); + init_vlc(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4, + &mb_type_b_tab[0][1], 2, 1, + &mb_type_b_tab[0][0], 2, 1, 1); + init_vlc(&h263_mbtype_b_vlc, H263_MBTYPE_B_VLC_BITS, 15, + &h263_mbtype_b_tab[0][1], 2, 1, + &h263_mbtype_b_tab[0][0], 2, 1, 1); + init_vlc(&cbpc_b_vlc, CBPC_B_VLC_BITS, 4, + &cbpc_b_tab[0][1], 2, 1, + &cbpc_b_tab[0][0], 2, 1, 1); + } +} + +/** + * Get the GOB height based on picture height. + */ +int ff_h263_get_gob_height(MpegEncContext *s){ + if (s->height <= 400) + return 1; + else if (s->height <= 800) + return 2; + else + return 4; +} + +int ff_h263_decode_mba(MpegEncContext *s) +{ + int i, mb_pos; + + for(i=0; i<6; i++){ + if(s->mb_num-1 <= ff_mba_max[i]) break; + } + mb_pos= get_bits(&s->gb, ff_mba_length[i]); + s->mb_x= mb_pos % s->mb_width; + s->mb_y= mb_pos / s->mb_width; + + return mb_pos; +} + +void ff_h263_encode_mba(MpegEncContext *s) +{ + int i, mb_pos; + + for(i=0; i<6; i++){ + if(s->mb_num-1 <= ff_mba_max[i]) break; + } + mb_pos= s->mb_x + s->mb_width*s->mb_y; + put_bits(&s->pb, ff_mba_length[i], mb_pos); +} + +/** + * decodes the group of blocks header or slice header. + * @return <0 if an error occured + */ +static int h263_decode_gob_header(MpegEncContext *s) +{ + unsigned int val, gfid, gob_number; + int left; + + /* Check for GOB Start Code */ + val = show_bits(&s->gb, 16); + if(val) + return -1; + + /* We have a GBSC probably with GSTUFF */ + skip_bits(&s->gb, 16); /* Drop the zeros */ + left= s->gb.size_in_bits - get_bits_count(&s->gb); + //MN: we must check the bits left or we might end in a infinite loop (or segfault) + for(;left>13; left--){ + if(get_bits1(&s->gb)) break; /* Seek the '1' bit */ + } + if(left<=13) + return -1; + + if(s->h263_slice_structured){ + if(get_bits1(&s->gb)==0) + return -1; + + ff_h263_decode_mba(s); + + if(s->mb_num > 1583) + if(get_bits1(&s->gb)==0) + return -1; + + s->qscale = get_bits(&s->gb, 5); /* SQUANT */ + if(get_bits1(&s->gb)==0) + return -1; + gfid = get_bits(&s->gb, 2); /* GFID */ + }else{ + gob_number = get_bits(&s->gb, 5); /* GN */ + s->mb_x= 0; + s->mb_y= s->gob_index* gob_number; + gfid = get_bits(&s->gb, 2); /* GFID */ + s->qscale = get_bits(&s->gb, 5); /* GQUANT */ + } + + if(s->mb_y >= s->mb_height) + return -1; + + if(s->qscale==0) + return -1; + + return 0; +} + +static inline void memsetw(short *tab, int val, int n) +{ + int i; + for(i=0;ipb); + uint8_t *end= s->pb.buf_end; + int size= end - start; + int pb_size = (((long)start + size/3)&(~3)) - (long)start; + int tex_size= (size - 2*pb_size)&(~3); + + set_put_bits_buffer_size(&s->pb, pb_size); + init_put_bits(&s->tex_pb, start + pb_size , tex_size); + init_put_bits(&s->pb2 , start + pb_size + tex_size, pb_size); +} + +void ff_mpeg4_merge_partitions(MpegEncContext *s) +{ + const int pb2_len = put_bits_count(&s->pb2 ); + const int tex_pb_len= put_bits_count(&s->tex_pb); + const int bits= put_bits_count(&s->pb); + + if(s->pict_type==I_TYPE){ + put_bits(&s->pb, 19, DC_MARKER); + s->misc_bits+=19 + pb2_len + bits - s->last_bits; + s->i_tex_bits+= tex_pb_len; + }else{ + put_bits(&s->pb, 17, MOTION_MARKER); + s->misc_bits+=17 + pb2_len; + s->mv_bits+= bits - s->last_bits; + s->p_tex_bits+= tex_pb_len; + } + + flush_put_bits(&s->pb2); + flush_put_bits(&s->tex_pb); + + set_put_bits_buffer_size(&s->pb, s->pb2.buf_end - s->pb.buf); + ff_copy_bits(&s->pb, s->pb2.buf , pb2_len); + ff_copy_bits(&s->pb, s->tex_pb.buf, tex_pb_len); + s->last_bits= put_bits_count(&s->pb); +} + +#endif //CONFIG_ENCODERS + +int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s){ + switch(s->pict_type){ + case I_TYPE: + return 16; + case P_TYPE: + case S_TYPE: + return s->f_code+15; + case B_TYPE: + return FFMAX(FFMAX(s->f_code, s->b_code)+15, 17); + default: + return -1; + } +} + +#ifdef CONFIG_ENCODERS + +void ff_mpeg4_encode_video_packet_header(MpegEncContext *s) +{ + int mb_num_bits= av_log2(s->mb_num - 1) + 1; + + put_bits(&s->pb, ff_mpeg4_get_video_packet_prefix_length(s), 0); + put_bits(&s->pb, 1, 1); + + put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width); + put_bits(&s->pb, s->quant_precision, s->qscale); + put_bits(&s->pb, 1, 0); /* no HEC */ +} + +#endif //CONFIG_ENCODERS + +/** + * check if the next stuff is a resync marker or the end. + * @return 0 if not + */ +static inline int mpeg4_is_resync(MpegEncContext *s){ + const int bits_count= get_bits_count(&s->gb); + + if(s->workaround_bugs&FF_BUG_NO_PADDING){ + return 0; + } + + if(bits_count + 8 >= s->gb.size_in_bits){ + int v= show_bits(&s->gb, 8); + v|= 0x7F >> (7-(bits_count&7)); + + if(v==0x7F) + return 1; + }else{ + if(show_bits(&s->gb, 16) == ff_mpeg4_resync_prefix[bits_count&7]){ + int len; + GetBitContext gb= s->gb; + + skip_bits(&s->gb, 1); + align_get_bits(&s->gb); + + for(len=0; len<32; len++){ + if(get_bits1(&s->gb)) break; + } + + s->gb= gb; + + if(len>=ff_mpeg4_get_video_packet_prefix_length(s)) + return 1; + } + } + return 0; +} + +/** + * decodes the next video packet. + * @return <0 if something went wrong + */ +static int mpeg4_decode_video_packet_header(MpegEncContext *s) +{ + int mb_num_bits= av_log2(s->mb_num - 1) + 1; + int header_extension=0, mb_num, len; + + /* is there enough space left for a video packet + header */ + if( get_bits_count(&s->gb) > s->gb.size_in_bits-20) return -1; + + for(len=0; len<32; len++){ + if(get_bits1(&s->gb)) break; + } + + if(len!=ff_mpeg4_get_video_packet_prefix_length(s)){ + av_log(s->avctx, AV_LOG_ERROR, "marker does not match f_code\n"); + return -1; + } + + if(s->shape != RECT_SHAPE){ + header_extension= get_bits1(&s->gb); + //FIXME more stuff here + } + + mb_num= get_bits(&s->gb, mb_num_bits); + if(mb_num>=s->mb_num){ + av_log(s->avctx, AV_LOG_ERROR, "illegal mb_num in video packet (%d %d) \n", mb_num, s->mb_num); + return -1; + } + if(s->pict_type == B_TYPE){ + while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) mb_num++; + if(mb_num >= s->mb_num) return -1; // slice contains just skipped MBs which where allready decoded + } + + s->mb_x= mb_num % s->mb_width; + s->mb_y= mb_num / s->mb_width; + + if(s->shape != BIN_ONLY_SHAPE){ + int qscale= get_bits(&s->gb, s->quant_precision); + if(qscale) + s->chroma_qscale=s->qscale= qscale; + } + + if(s->shape == RECT_SHAPE){ + header_extension= get_bits1(&s->gb); + } + if(header_extension){ + int time_increment; + int time_incr=0; + + while (get_bits1(&s->gb) != 0) + time_incr++; + + check_marker(&s->gb, "before time_increment in video packed header"); + time_increment= get_bits(&s->gb, s->time_increment_bits); + check_marker(&s->gb, "before vop_coding_type in video packed header"); + + skip_bits(&s->gb, 2); /* vop coding type */ + //FIXME not rect stuff here + + if(s->shape != BIN_ONLY_SHAPE){ + skip_bits(&s->gb, 3); /* intra dc vlc threshold */ +//FIXME don't just ignore everything + if(s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE){ + mpeg4_decode_sprite_trajectory(s, &s->gb); + av_log(s->avctx, AV_LOG_ERROR, "untested\n"); + } + + //FIXME reduced res stuff here + + if (s->pict_type != I_TYPE) { + int f_code = get_bits(&s->gb, 3); /* fcode_for */ + if(f_code==0){ + av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n"); + } + } + if (s->pict_type == B_TYPE) { + int b_code = get_bits(&s->gb, 3); + if(b_code==0){ + av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (b_code=0)\n"); + } + } + } + } + //FIXME new-pred stuff + +//printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb)); + + return 0; +} + +void ff_mpeg4_clean_buffers(MpegEncContext *s) +{ + int c_wrap, c_xy, l_wrap, l_xy; + + l_wrap= s->b8_stride; + l_xy= (2*s->mb_y-1)*l_wrap + s->mb_x*2 - 1; + c_wrap= s->mb_stride; + c_xy= (s->mb_y-1)*c_wrap + s->mb_x - 1; + +#if 0 + /* clean DC */ + memsetw(s->dc_val[0] + l_xy, 1024, l_wrap*2+1); + memsetw(s->dc_val[1] + c_xy, 1024, c_wrap+1); + memsetw(s->dc_val[2] + c_xy, 1024, c_wrap+1); +#endif + + /* clean AC */ + memset(s->ac_val[0] + l_xy, 0, (l_wrap*2+1)*16*sizeof(int16_t)); + memset(s->ac_val[1] + c_xy, 0, (c_wrap +1)*16*sizeof(int16_t)); + memset(s->ac_val[2] + c_xy, 0, (c_wrap +1)*16*sizeof(int16_t)); + + /* clean MV */ + // we can't clear the MVs as they might be needed by a b frame +// memset(s->motion_val + l_xy, 0, (l_wrap*2+1)*2*sizeof(int16_t)); +// memset(s->motion_val, 0, 2*sizeof(int16_t)*(2 + s->mb_width*2)*(2 + s->mb_height*2)); + s->last_mv[0][0][0]= + s->last_mv[0][0][1]= + s->last_mv[1][0][0]= + s->last_mv[1][0][1]= 0; +} + +/** + * decodes the group of blocks / video packet header. + * @return <0 if no resync found + */ +int ff_h263_resync(MpegEncContext *s){ + int left, ret; + + if(s->codec_id==CODEC_ID_MPEG4){ + skip_bits1(&s->gb); + align_get_bits(&s->gb); + } + + if(show_bits(&s->gb, 16)==0){ + if(s->codec_id==CODEC_ID_MPEG4) + ret= mpeg4_decode_video_packet_header(s); + else + ret= h263_decode_gob_header(s); + if(ret>=0) + return 0; + } + //ok, it's not where its supposed to be ... + s->gb= s->last_resync_gb; + align_get_bits(&s->gb); + left= s->gb.size_in_bits - get_bits_count(&s->gb); + + for(;left>16+1+5+5; left-=8){ + if(show_bits(&s->gb, 16)==0){ + GetBitContext bak= s->gb; + + if(s->codec_id==CODEC_ID_MPEG4) + ret= mpeg4_decode_video_packet_header(s); + else + ret= h263_decode_gob_header(s); + if(ret>=0) + return 0; + + s->gb= bak; + } + skip_bits(&s->gb, 8); + } + + return -1; +} + +/** + * gets the average motion vector for a GMC MB. + * @param n either 0 for the x component or 1 for y + * @returns the average MV for a GMC MB + */ +static inline int get_amv(MpegEncContext *s, int n){ + int x, y, mb_v, sum, dx, dy, shift; + int len = 1 << (s->f_code + 4); + const int a= s->sprite_warping_accuracy; + + if(s->workaround_bugs & FF_BUG_AMV) + len >>= s->quarter_sample; + + if(s->real_sprite_warping_points==1){ + if(s->divx_version==500 && s->divx_build==413) + sum= s->sprite_offset[0][n] / (1<<(a - s->quarter_sample)); + else + sum= RSHIFT(s->sprite_offset[0][n]<quarter_sample, a); + }else{ + dx= s->sprite_delta[n][0]; + dy= s->sprite_delta[n][1]; + shift= s->sprite_shift[0]; + if(n) dy -= 1<<(shift + a + 1); + else dx -= 1<<(shift + a + 1); + mb_v= s->sprite_offset[0][n] + dx*s->mb_x*16 + dy*s->mb_y*16; + + sum=0; + for(y=0; y<16; y++){ + int v; + + v= mb_v + dy*y; + //XXX FIXME optimize + for(x=0; x<16; x++){ + sum+= v>>shift; + v+= dx; + } + } + sum= RSHIFT(sum, a+8-s->quarter_sample); + } + + if (sum < -len) sum= -len; + else if (sum >= len) sum= len-1; + + return sum; +} + +/** + * decodes first partition. + * @return number of MBs decoded or <0 if an error occured + */ +static int mpeg4_decode_partition_a(MpegEncContext *s){ + int mb_num; + static const int8_t quant_tab[4] = { -1, -2, 1, 2 }; + + /* decode first partition */ + mb_num=0; + s->first_slice_line=1; + for(; s->mb_ymb_height; s->mb_y++){ + ff_init_block_index(s); + for(; s->mb_xmb_width; s->mb_x++){ + const int xy= s->mb_x + s->mb_y*s->mb_stride; + int cbpc; + int dir=0; + + mb_num++; + ff_update_block_index(s); + if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1) + s->first_slice_line=0; + + if(s->pict_type==I_TYPE){ + int i; + + do{ + if(show_bits_long(&s->gb, 19)==DC_MARKER){ + return mb_num-1; + } + + cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2); + if (cbpc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + }while(cbpc == 8); + + s->cbp_table[xy]= cbpc & 3; + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + s->mb_intra = 1; + + if(cbpc & 4) { + ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]); + } + s->current_picture.qscale_table[xy]= s->qscale; + + s->mbintra_table[xy]= 1; + for(i=0; i<6; i++){ + int dc_pred_dir; + int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); + if(dc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + dir<<=1; + if(dc_pred_dir) dir|=1; + } + s->pred_dir_table[xy]= dir; + }else{ /* P/S_TYPE */ + int mx, my, pred_x, pred_y, bits; + int16_t * const mot_val= s->current_picture.motion_val[0][s->block_index[0]]; + const int stride= s->b8_stride*2; + +try_again: + bits= show_bits(&s->gb, 17); + if(bits==MOTION_MARKER){ + return mb_num-1; + } + skip_bits1(&s->gb); + if(bits&0x10000){ + /* skip mb */ + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){ + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_GMC | MB_TYPE_L0; + mx= get_amv(s, 0); + my= get_amv(s, 1); + }else{ + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + mx=my=0; + } + mot_val[0 ]= mot_val[2 ]= + mot_val[0+stride]= mot_val[2+stride]= mx; + mot_val[1 ]= mot_val[3 ]= + mot_val[1+stride]= mot_val[3+stride]= my; + + if(s->mbintra_table[xy]) + ff_clean_intra_table_entries(s); + continue; + } + + cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2); + if (cbpc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "cbpc corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + if(cbpc == 20) + goto try_again; + + s->cbp_table[xy]= cbpc&(8+3); //8 is dquant + + s->mb_intra = ((cbpc & 4) != 0); + + if(s->mb_intra){ + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + s->mbintra_table[xy]= 1; + mot_val[0 ]= mot_val[2 ]= + mot_val[0+stride]= mot_val[2+stride]= 0; + mot_val[1 ]= mot_val[3 ]= + mot_val[1+stride]= mot_val[3+stride]= 0; + }else{ + if(s->mbintra_table[xy]) + ff_clean_intra_table_entries(s); + + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0) + s->mcsel= get_bits1(&s->gb); + else s->mcsel= 0; + + if ((cbpc & 16) == 0) { + /* 16x16 motion prediction */ + + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + if(!s->mcsel){ + mx = h263_decode_motion(s, pred_x, s->f_code); + if (mx >= 0xffff) + return -1; + + my = h263_decode_motion(s, pred_y, s->f_code); + if (my >= 0xffff) + return -1; + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; + } else { + mx = get_amv(s, 0); + my = get_amv(s, 1); + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_GMC | MB_TYPE_L0; + } + + mot_val[0 ]= mot_val[2 ] = + mot_val[0+stride]= mot_val[2+stride]= mx; + mot_val[1 ]= mot_val[3 ]= + mot_val[1+stride]= mot_val[3+stride]= my; + } else { + int i; + s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; + for(i=0;i<4;i++) { + int16_t *mot_val= h263_pred_motion(s, i, 0, &pred_x, &pred_y); + mx = h263_decode_motion(s, pred_x, s->f_code); + if (mx >= 0xffff) + return -1; + + my = h263_decode_motion(s, pred_y, s->f_code); + if (my >= 0xffff) + return -1; + mot_val[0] = mx; + mot_val[1] = my; + } + } + } + } + } + s->mb_x= 0; + } + + return mb_num; +} + +/** + * decode second partition. + * @return <0 if an error occured + */ +static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){ + int mb_num=0; + static const int8_t quant_tab[4] = { -1, -2, 1, 2 }; + + s->mb_x= s->resync_mb_x; + s->first_slice_line=1; + for(s->mb_y= s->resync_mb_y; mb_num < mb_count; s->mb_y++){ + ff_init_block_index(s); + for(; mb_num < mb_count && s->mb_xmb_width; s->mb_x++){ + const int xy= s->mb_x + s->mb_y*s->mb_stride; + + mb_num++; + ff_update_block_index(s); + if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1) + s->first_slice_line=0; + + if(s->pict_type==I_TYPE){ + int ac_pred= get_bits1(&s->gb); + int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + if(cbpy<0){ + av_log(s->avctx, AV_LOG_ERROR, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + s->cbp_table[xy]|= cbpy<<2; + s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED; + }else{ /* P || S_TYPE */ + if(IS_INTRA(s->current_picture.mb_type[xy])){ + int dir=0,i; + int ac_pred = get_bits1(&s->gb); + int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + + if(cbpy<0){ + av_log(s->avctx, AV_LOG_ERROR, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + if(s->cbp_table[xy] & 8) { + ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]); + } + s->current_picture.qscale_table[xy]= s->qscale; + + for(i=0; i<6; i++){ + int dc_pred_dir; + int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); + if(dc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + dir<<=1; + if(dc_pred_dir) dir|=1; + } + s->cbp_table[xy]&= 3; //remove dquant + s->cbp_table[xy]|= cbpy<<2; + s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED; + s->pred_dir_table[xy]= dir; + }else if(IS_SKIP(s->current_picture.mb_type[xy])){ + s->current_picture.qscale_table[xy]= s->qscale; + s->cbp_table[xy]= 0; + }else{ + int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + + if(cbpy<0){ + av_log(s->avctx, AV_LOG_ERROR, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + if(s->cbp_table[xy] & 8) { + ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]); + } + s->current_picture.qscale_table[xy]= s->qscale; + + s->cbp_table[xy]&= 3; //remove dquant + s->cbp_table[xy]|= (cbpy^0xf)<<2; + } + } + } + if(mb_num >= mb_count) return 0; + s->mb_x= 0; + } + return 0; +} + +/** + * decodes the first & second partition + * @return <0 if error (and sets error type in the error_status_table) + */ +int ff_mpeg4_decode_partitions(MpegEncContext *s) +{ + int mb_num; + const int part_a_error= s->pict_type==I_TYPE ? (DC_ERROR|MV_ERROR) : MV_ERROR; + const int part_a_end = s->pict_type==I_TYPE ? (DC_END |MV_END) : MV_END; + + mb_num= mpeg4_decode_partition_a(s); + if(mb_num<0){ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error); + return -1; + } + + if(s->resync_mb_x + s->resync_mb_y*s->mb_width + mb_num > s->mb_num){ + av_log(s->avctx, AV_LOG_ERROR, "slice below monitor ...\n"); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error); + return -1; + } + + s->mb_num_left= mb_num; + + if(s->pict_type==I_TYPE){ + while(show_bits(&s->gb, 9) == 1) + skip_bits(&s->gb, 9); + if(get_bits_long(&s->gb, 19)!=DC_MARKER){ + av_log(s->avctx, AV_LOG_ERROR, "marker missing after first I partition at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + }else{ + while(show_bits(&s->gb, 10) == 1) + skip_bits(&s->gb, 10); + if(get_bits(&s->gb, 17)!=MOTION_MARKER){ + av_log(s->avctx, AV_LOG_ERROR, "marker missing after first P partition at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + } + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, part_a_end); + + if( mpeg4_decode_partition_b(s, mb_num) < 0){ + if(s->pict_type==P_TYPE) + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, DC_ERROR); + return -1; + }else{ + if(s->pict_type==P_TYPE) + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, DC_END); + } + + return 0; +} + +/** + * decode partition C of one MB. + * @return <0 if an error occured + */ +static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64]) +{ + int cbp, mb_type; + const int xy= s->mb_x + s->mb_y*s->mb_stride; + + mb_type= s->current_picture.mb_type[xy]; + cbp = s->cbp_table[xy]; + + if(s->current_picture.qscale_table[xy] != s->qscale){ + ff_set_qscale(s, s->current_picture.qscale_table[xy] ); + } + + if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) { + int i; + for(i=0; i<4; i++){ + s->mv[0][i][0] = s->current_picture.motion_val[0][ s->block_index[i] ][0]; + s->mv[0][i][1] = s->current_picture.motion_val[0][ s->block_index[i] ][1]; + } + s->mb_intra = IS_INTRA(mb_type); + + if (IS_SKIP(mb_type)) { + /* skip mb */ + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){ + s->mcsel=1; + s->mb_skipped = 0; + }else{ + s->mcsel=0; + s->mb_skipped = 1; + } + }else if(s->mb_intra){ + s->ac_pred = IS_ACPRED(s->current_picture.mb_type[xy]); + }else if(!s->mb_intra){ +// s->mcsel= 0; //FIXME do we need to init that + + s->mv_dir = MV_DIR_FORWARD; + if (IS_8X8(mb_type)) { + s->mv_type = MV_TYPE_8X8; + } else { + s->mv_type = MV_TYPE_16X16; + } + } + } else { /* I-Frame */ + s->mb_intra = 1; + s->ac_pred = IS_ACPRED(s->current_picture.mb_type[xy]); + } + + if (!IS_SKIP(mb_type)) { + int i; + s->dsp.clear_blocks(s->block[0]); + /* decode each block */ + for (i = 0; i < 6; i++) { + if(mpeg4_decode_block(s, block[i], i, cbp&32, s->mb_intra, s->rvlc) < 0){ + av_log(s->avctx, AV_LOG_ERROR, "texture corrupted at %d %d %d\n", s->mb_x, s->mb_y, s->mb_intra); + return -1; + } + cbp+=cbp; + } + } + + /* per-MB end of slice check */ + + if(--s->mb_num_left <= 0){ +//printf("%06X %d\n", show_bits(&s->gb, 24), s->gb.size_in_bits - get_bits_count(&s->gb)); + if(mpeg4_is_resync(s)) + return SLICE_END; + else + return SLICE_NOEND; + }else{ + if(mpeg4_is_resync(s)){ + const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1; + if(s->cbp_table[xy+delta]) + return SLICE_END; + } + return SLICE_OK; + } +} + +/** + * read the next MVs for OBMC. yes this is a ugly hack, feel free to send a patch :) + */ +static void preview_obmc(MpegEncContext *s){ + GetBitContext gb= s->gb; + + int cbpc, i, pred_x, pred_y, mx, my; + int16_t *mot_val; + const int xy= s->mb_x + 1 + s->mb_y * s->mb_stride; + const int stride= s->b8_stride*2; + + for(i=0; i<4; i++) + s->block_index[i]+= 2; + for(i=4; i<6; i++) + s->block_index[i]+= 1; + s->mb_x++; + + assert(s->pict_type == P_TYPE); + + do{ + if (get_bits1(&s->gb)) { + /* skip mb */ + mot_val = s->current_picture.motion_val[0][ s->block_index[0] ]; + mot_val[0 ]= mot_val[2 ]= + mot_val[0+stride]= mot_val[2+stride]= 0; + mot_val[1 ]= mot_val[3 ]= + mot_val[1+stride]= mot_val[3+stride]= 0; + + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + goto end; + } + cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2); + }while(cbpc == 20); + + if(cbpc & 4){ + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + }else{ + get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + if (cbpc & 8) { + if(s->modified_quant){ + if(get_bits1(&s->gb)) skip_bits(&s->gb, 1); + else skip_bits(&s->gb, 5); + }else + skip_bits(&s->gb, 2); + } + + if ((cbpc & 16) == 0) { + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; + /* 16x16 motion prediction */ + mot_val= h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + if (s->umvplus) + mx = h263p_decode_umotion(s, pred_x); + else + mx = h263_decode_motion(s, pred_x, 1); + + if (s->umvplus) + my = h263p_decode_umotion(s, pred_y); + else + my = h263_decode_motion(s, pred_y, 1); + + mot_val[0 ]= mot_val[2 ]= + mot_val[0+stride]= mot_val[2+stride]= mx; + mot_val[1 ]= mot_val[3 ]= + mot_val[1+stride]= mot_val[3+stride]= my; + } else { + s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; + for(i=0;i<4;i++) { + mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y); + if (s->umvplus) + mx = h263p_decode_umotion(s, pred_x); + else + mx = h263_decode_motion(s, pred_x, 1); + + if (s->umvplus) + my = h263p_decode_umotion(s, pred_y); + else + my = h263_decode_motion(s, pred_y, 1); + if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1) + skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */ + mot_val[0] = mx; + mot_val[1] = my; + } + } + } +end: + + for(i=0; i<4; i++) + s->block_index[i]-= 2; + for(i=4; i<6; i++) + s->block_index[i]-= 1; + s->mb_x--; + + s->gb= gb; +} + +static void h263_decode_dquant(MpegEncContext *s){ + static const int8_t quant_tab[4] = { -1, -2, 1, 2 }; + + if(s->modified_quant){ + if(get_bits1(&s->gb)) + s->qscale= modified_quant_tab[get_bits1(&s->gb)][ s->qscale ]; + else + s->qscale= get_bits(&s->gb, 5); + }else + s->qscale += quant_tab[get_bits(&s->gb, 2)]; + ff_set_qscale(s, s->qscale); +} + +int ff_h263_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant; + int16_t *mot_val; + const int xy= s->mb_x + s->mb_y * s->mb_stride; + + assert(!s->h263_pred); + + if (s->pict_type == P_TYPE) { + do{ + if (get_bits1(&s->gb)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skipped = !(s->obmc | s->loop_filter); + goto end; + } + cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2); + //fprintf(stderr, "\tCBPC: %d", cbpc); + if (cbpc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "cbpc damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + }while(cbpc == 20); + + s->dsp.clear_blocks(s->block[0]); + + dquant = cbpc & 8; + s->mb_intra = ((cbpc & 4) != 0); + if (s->mb_intra) goto intra; + + cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + + if(s->alt_inter_vlc==0 || (cbpc & 3)!=3) + cbpy ^= 0xF; + + cbp = (cbpc & 3) | (cbpy << 2); + if (dquant) { + h263_decode_dquant(s); + } + + s->mv_dir = MV_DIR_FORWARD; + if ((cbpc & 16) == 0) { + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; + /* 16x16 motion prediction */ + s->mv_type = MV_TYPE_16X16; + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + if (s->umvplus) + mx = h263p_decode_umotion(s, pred_x); + else + mx = h263_decode_motion(s, pred_x, 1); + + if (mx >= 0xffff) + return -1; + + if (s->umvplus) + my = h263p_decode_umotion(s, pred_y); + else + my = h263_decode_motion(s, pred_y, 1); + + if (my >= 0xffff) + return -1; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + + if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1) + skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */ + } else { + s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; + s->mv_type = MV_TYPE_8X8; + for(i=0;i<4;i++) { + mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y); + if (s->umvplus) + mx = h263p_decode_umotion(s, pred_x); + else + mx = h263_decode_motion(s, pred_x, 1); + if (mx >= 0xffff) + return -1; + + if (s->umvplus) + my = h263p_decode_umotion(s, pred_y); + else + my = h263_decode_motion(s, pred_y, 1); + if (my >= 0xffff) + return -1; + s->mv[0][i][0] = mx; + s->mv[0][i][1] = my; + if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1) + skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */ + mot_val[0] = mx; + mot_val[1] = my; + } + } + + /* decode each block */ + for (i = 0; i < 6; i++) { + if (h263_decode_block(s, block[i], i, cbp&32) < 0) + return -1; + cbp+=cbp; + } + + if(s->obmc){ + if(s->pict_type == P_TYPE && s->mb_x+1mb_width && s->mb_num_left != 1) + preview_obmc(s); + } + } else if(s->pict_type==B_TYPE) { + int mb_type; + const int stride= s->b8_stride; + int16_t *mot_val0 = s->current_picture.motion_val[0][ 2*(s->mb_x + s->mb_y*stride) ]; + int16_t *mot_val1 = s->current_picture.motion_val[1][ 2*(s->mb_x + s->mb_y*stride) ]; +// const int mv_xy= s->mb_x + 1 + s->mb_y * s->mb_stride; + + //FIXME ugly + mot_val0[0 ]= mot_val0[2 ]= mot_val0[0+2*stride]= mot_val0[2+2*stride]= + mot_val0[1 ]= mot_val0[3 ]= mot_val0[1+2*stride]= mot_val0[3+2*stride]= + mot_val1[0 ]= mot_val1[2 ]= mot_val1[0+2*stride]= mot_val1[2+2*stride]= + mot_val1[1 ]= mot_val1[3 ]= mot_val1[1+2*stride]= mot_val1[3+2*stride]= 0; + + do{ + mb_type= get_vlc2(&s->gb, h263_mbtype_b_vlc.table, H263_MBTYPE_B_VLC_BITS, 2); + if (mb_type < 0){ + av_log(s->avctx, AV_LOG_ERROR, "b mb_type damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + mb_type= h263_mb_type_b_map[ mb_type ]; + }while(!mb_type); + + s->mb_intra = IS_INTRA(mb_type); + if(HAS_CBP(mb_type)){ + s->dsp.clear_blocks(s->block[0]); + cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1); + if(s->mb_intra){ + dquant = IS_QUANT(mb_type); + goto intra; + } + + cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + + if (cbpy < 0){ + av_log(s->avctx, AV_LOG_ERROR, "b cbpy damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + if(s->alt_inter_vlc==0 || (cbpc & 3)!=3) + cbpy ^= 0xF; + + cbp = (cbpc & 3) | (cbpy << 2); + }else + cbp=0; + + assert(!s->mb_intra); + + if(IS_QUANT(mb_type)){ + h263_decode_dquant(s); + } + + if(IS_DIRECT(mb_type)){ + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; + mb_type |= ff_mpeg4_set_direct_mv(s, 0, 0); + }else{ + s->mv_dir = 0; + s->mv_type= MV_TYPE_16X16; +//FIXME UMV + + if(USES_LIST(mb_type, 0)){ + int16_t *mot_val= h263_pred_motion(s, 0, 0, &mx, &my); + s->mv_dir = MV_DIR_FORWARD; + + mx = h263_decode_motion(s, mx, 1); + my = h263_decode_motion(s, my, 1); + + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + mot_val[0 ]= mot_val[2 ]= mot_val[0+2*stride]= mot_val[2+2*stride]= mx; + mot_val[1 ]= mot_val[3 ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my; + } + + if(USES_LIST(mb_type, 1)){ + int16_t *mot_val= h263_pred_motion(s, 0, 1, &mx, &my); + s->mv_dir |= MV_DIR_BACKWARD; + + mx = h263_decode_motion(s, mx, 1); + my = h263_decode_motion(s, my, 1); + + s->mv[1][0][0] = mx; + s->mv[1][0][1] = my; + mot_val[0 ]= mot_val[2 ]= mot_val[0+2*stride]= mot_val[2+2*stride]= mx; + mot_val[1 ]= mot_val[3 ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my; + } + } + + s->current_picture.mb_type[xy]= mb_type; + + /* decode each block */ + for (i = 0; i < 6; i++) { + if (h263_decode_block(s, block[i], i, cbp&32) < 0) + return -1; + cbp+=cbp; + } + } else { /* I-Frame */ + do{ + cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2); + if (cbpc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "I cbpc damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + }while(cbpc == 8); + + s->dsp.clear_blocks(s->block[0]); + + dquant = cbpc & 4; + s->mb_intra = 1; +intra: + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + if (s->h263_aic) { + s->ac_pred = get_bits1(&s->gb); + if(s->ac_pred){ + s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED; + + s->h263_aic_dir = get_bits1(&s->gb); + } + }else + s->ac_pred = 0; + + cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + if(cbpy<0){ + av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + cbp = (cbpc & 3) | (cbpy << 2); + if (dquant) { + h263_decode_dquant(s); + } + + /* decode each block */ + for (i = 0; i < 6; i++) { + if (h263_decode_block(s, block[i], i, cbp&32) < 0) + return -1; + cbp+=cbp; + } + } +end: + + /* per-MB end of slice check */ + { + int v= show_bits(&s->gb, 16); + + if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){ + v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits; + } + + if(v==0) + return SLICE_END; + } + + return SLICE_OK; +} + +int ff_mpeg4_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant; + int16_t *mot_val; + static int8_t quant_tab[4] = { -1, -2, 1, 2 }; + const int xy= s->mb_x + s->mb_y * s->mb_stride; + + assert(s->h263_pred); + + if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) { + do{ + if (get_bits1(&s->gb)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){ + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_GMC | MB_TYPE_16x16 | MB_TYPE_L0; + s->mcsel=1; + s->mv[0][0][0]= get_amv(s, 0); + s->mv[0][0][1]= get_amv(s, 1); + + s->mb_skipped = 0; + }else{ + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + s->mcsel=0; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skipped = 1; + } + goto end; + } + cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2); + //fprintf(stderr, "\tCBPC: %d", cbpc); + if (cbpc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "cbpc damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + }while(cbpc == 20); + + s->dsp.clear_blocks(s->block[0]); + dquant = cbpc & 8; + s->mb_intra = ((cbpc & 4) != 0); + if (s->mb_intra) goto intra; + + if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0) + s->mcsel= get_bits1(&s->gb); + else s->mcsel= 0; + cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1) ^ 0x0F; + + cbp = (cbpc & 3) | (cbpy << 2); + if (dquant) { + ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]); + } + if((!s->progressive_sequence) && (cbp || (s->workaround_bugs&FF_BUG_XVID_ILACE))) + s->interlaced_dct= get_bits1(&s->gb); + + s->mv_dir = MV_DIR_FORWARD; + if ((cbpc & 16) == 0) { + if(s->mcsel){ + s->current_picture.mb_type[xy]= MB_TYPE_GMC | MB_TYPE_16x16 | MB_TYPE_L0; + /* 16x16 global motion prediction */ + s->mv_type = MV_TYPE_16X16; + mx= get_amv(s, 0); + my= get_amv(s, 1); + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + }else if((!s->progressive_sequence) && get_bits1(&s->gb)){ + s->current_picture.mb_type[xy]= MB_TYPE_16x8 | MB_TYPE_L0 | MB_TYPE_INTERLACED; + /* 16x8 field motion prediction */ + s->mv_type= MV_TYPE_FIELD; + + s->field_select[0][0]= get_bits1(&s->gb); + s->field_select[0][1]= get_bits1(&s->gb); + + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + + for(i=0; i<2; i++){ + mx = h263_decode_motion(s, pred_x, s->f_code); + if (mx >= 0xffff) + return -1; + + my = h263_decode_motion(s, pred_y/2, s->f_code); + if (my >= 0xffff) + return -1; + + s->mv[0][i][0] = mx; + s->mv[0][i][1] = my; + } + }else{ + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; + /* 16x16 motion prediction */ + s->mv_type = MV_TYPE_16X16; + h263_pred_motion(s, 0, 0, &pred_x, &pred_y); + mx = h263_decode_motion(s, pred_x, s->f_code); + + if (mx >= 0xffff) + return -1; + + my = h263_decode_motion(s, pred_y, s->f_code); + + if (my >= 0xffff) + return -1; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + } + } else { + s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; + s->mv_type = MV_TYPE_8X8; + for(i=0;i<4;i++) { + mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y); + mx = h263_decode_motion(s, pred_x, s->f_code); + if (mx >= 0xffff) + return -1; + + my = h263_decode_motion(s, pred_y, s->f_code); + if (my >= 0xffff) + return -1; + s->mv[0][i][0] = mx; + s->mv[0][i][1] = my; + mot_val[0] = mx; + mot_val[1] = my; + } + } + } else if(s->pict_type==B_TYPE) { + int modb1; // first bit of modb + int modb2; // second bit of modb + int mb_type; + + s->mb_intra = 0; //B-frames never contain intra blocks + s->mcsel=0; // ... true gmc blocks + + if(s->mb_x==0){ + for(i=0; i<2; i++){ + s->last_mv[i][0][0]= + s->last_mv[i][0][1]= + s->last_mv[i][1][0]= + s->last_mv[i][1][1]= 0; + } + } + + /* if we skipped it in the future P Frame than skip it now too */ + s->mb_skipped= s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]; // Note, skiptab=0 if last was GMC + + if(s->mb_skipped){ + /* skip mb */ + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mv[1][0][0] = 0; + s->mv[1][0][1] = 0; + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + goto end; + } + + modb1= get_bits1(&s->gb); + if(modb1){ + mb_type= MB_TYPE_DIRECT2 | MB_TYPE_SKIP | MB_TYPE_L0L1; //like MB_TYPE_B_DIRECT but no vectors coded + cbp=0; + }else{ + modb2= get_bits1(&s->gb); + mb_type= get_vlc2(&s->gb, mb_type_b_vlc.table, MB_TYPE_B_VLC_BITS, 1); + if(mb_type<0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal MB_type\n"); + return -1; + } + mb_type= mb_type_b_map[ mb_type ]; + if(modb2) cbp= 0; + else{ + s->dsp.clear_blocks(s->block[0]); + cbp= get_bits(&s->gb, 6); + } + + if ((!IS_DIRECT(mb_type)) && cbp) { + if(get_bits1(&s->gb)){ + ff_set_qscale(s, s->qscale + get_bits1(&s->gb)*4 - 2); + } + } + + if(!s->progressive_sequence){ + if(cbp) + s->interlaced_dct= get_bits1(&s->gb); + + if(!IS_DIRECT(mb_type) && get_bits1(&s->gb)){ + mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED; + mb_type &= ~MB_TYPE_16x16; + + if(USES_LIST(mb_type, 0)){ + s->field_select[0][0]= get_bits1(&s->gb); + s->field_select[0][1]= get_bits1(&s->gb); + } + if(USES_LIST(mb_type, 1)){ + s->field_select[1][0]= get_bits1(&s->gb); + s->field_select[1][1]= get_bits1(&s->gb); + } + } + } + + s->mv_dir = 0; + if((mb_type & (MB_TYPE_DIRECT2|MB_TYPE_INTERLACED)) == 0){ + s->mv_type= MV_TYPE_16X16; + + if(USES_LIST(mb_type, 0)){ + s->mv_dir = MV_DIR_FORWARD; + + mx = h263_decode_motion(s, s->last_mv[0][0][0], s->f_code); + my = h263_decode_motion(s, s->last_mv[0][0][1], s->f_code); + s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx; + s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my; + } + + if(USES_LIST(mb_type, 1)){ + s->mv_dir |= MV_DIR_BACKWARD; + + mx = h263_decode_motion(s, s->last_mv[1][0][0], s->b_code); + my = h263_decode_motion(s, s->last_mv[1][0][1], s->b_code); + s->last_mv[1][1][0]= s->last_mv[1][0][0]= s->mv[1][0][0] = mx; + s->last_mv[1][1][1]= s->last_mv[1][0][1]= s->mv[1][0][1] = my; + } + }else if(!IS_DIRECT(mb_type)){ + s->mv_type= MV_TYPE_FIELD; + + if(USES_LIST(mb_type, 0)){ + s->mv_dir = MV_DIR_FORWARD; + + for(i=0; i<2; i++){ + mx = h263_decode_motion(s, s->last_mv[0][i][0] , s->f_code); + my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code); + s->last_mv[0][i][0]= s->mv[0][i][0] = mx; + s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2; + } + } + + if(USES_LIST(mb_type, 1)){ + s->mv_dir |= MV_DIR_BACKWARD; + + for(i=0; i<2; i++){ + mx = h263_decode_motion(s, s->last_mv[1][i][0] , s->b_code); + my = h263_decode_motion(s, s->last_mv[1][i][1]/2, s->b_code); + s->last_mv[1][i][0]= s->mv[1][i][0] = mx; + s->last_mv[1][i][1]= (s->mv[1][i][1] = my)*2; + } + } + } + } + + if(IS_DIRECT(mb_type)){ + if(IS_SKIP(mb_type)) + mx=my=0; + else{ + mx = h263_decode_motion(s, 0, 1); + my = h263_decode_motion(s, 0, 1); + } + + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; + mb_type |= ff_mpeg4_set_direct_mv(s, mx, my); + } + s->current_picture.mb_type[xy]= mb_type; + } else { /* I-Frame */ + do{ + cbpc = get_vlc2(&s->gb, intra_MCBPC_vlc.table, INTRA_MCBPC_VLC_BITS, 2); + if (cbpc < 0){ + av_log(s->avctx, AV_LOG_ERROR, "I cbpc damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + }while(cbpc == 8); + + dquant = cbpc & 4; + s->mb_intra = 1; +intra: + s->ac_pred = get_bits1(&s->gb); + if(s->ac_pred) + s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED; + else + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + + cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + if(cbpy<0){ + av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + cbp = (cbpc & 3) | (cbpy << 2); + if (dquant) { + ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]); + } + + if(!s->progressive_sequence) + s->interlaced_dct= get_bits1(&s->gb); + + s->dsp.clear_blocks(s->block[0]); + /* decode each block */ + for (i = 0; i < 6; i++) { + if (mpeg4_decode_block(s, block[i], i, cbp&32, 1, 0) < 0) + return -1; + cbp+=cbp; + } + goto end; + } + + /* decode each block */ + for (i = 0; i < 6; i++) { + if (mpeg4_decode_block(s, block[i], i, cbp&32, 0, 0) < 0) + return -1; + cbp+=cbp; + } +end: + + /* per-MB end of slice check */ + if(s->codec_id==CODEC_ID_MPEG4){ + if(mpeg4_is_resync(s)){ + const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1; + if(s->pict_type==B_TYPE && s->next_picture.mbskip_table[xy + delta]) + return SLICE_OK; + return SLICE_END; + } + } + + return SLICE_OK; +} + +static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) +{ + int code, val, sign, shift, l; + code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2); + + if (code == 0) + return pred; + if (code < 0) + return 0xffff; + + sign = get_bits1(&s->gb); + shift = f_code - 1; + val = code; + if (shift) { + val = (val - 1) << shift; + val |= get_bits(&s->gb, shift); + val++; + } + if (sign) + val = -val; + val += pred; + + /* modulo decoding */ + if (!s->h263_long_vectors) { + l = INT_BIT - 5 - f_code; + val = (val<>l; + } else { + /* horrible h263 long vector mode */ + if (pred < -31 && val < -63) + val += 64; + if (pred > 32 && val > 63) + val -= 64; + + } + return val; +} + +/* Decodes RVLC of H.263+ UMV */ +static int h263p_decode_umotion(MpegEncContext * s, int pred) +{ + int code = 0, sign; + + if (get_bits1(&s->gb)) /* Motion difference = 0 */ + return pred; + + code = 2 + get_bits1(&s->gb); + + while (get_bits1(&s->gb)) + { + code <<= 1; + code += get_bits1(&s->gb); + } + sign = code & 1; + code >>= 1; + + code = (sign) ? (pred - code) : (pred + code); +#ifdef DEBUG + av_log( s->avctx, AV_LOG_DEBUG,"H.263+ UMV Motion = %d\n", code); +#endif + return code; + +} + +static int h263_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded) +{ + int code, level, i, j, last, run; + RLTable *rl = &rl_inter; + const uint8_t *scan_table; + GetBitContext gb= s->gb; + + scan_table = s->intra_scantable.permutated; + if (s->h263_aic && s->mb_intra) { + rl = &rl_intra_aic; + i = 0; + if (s->ac_pred) { + if (s->h263_aic_dir) + scan_table = s->intra_v_scantable.permutated; /* left */ + else + scan_table = s->intra_h_scantable.permutated; /* top */ + } + } else if (s->mb_intra) { + /* DC coef */ + if(s->codec_id == CODEC_ID_RV10){ +#ifdef CONFIG_RV10_DECODER + if (s->rv10_version == 3 && s->pict_type == I_TYPE) { + int component, diff; + component = (n <= 3 ? 0 : n - 4 + 1); + level = s->last_dc[component]; + if (s->rv10_first_dc_coded[component]) { + diff = rv_decode_dc(s, n); + if (diff == 0xffff) + return -1; + level += diff; + level = level & 0xff; /* handle wrap round */ + s->last_dc[component] = level; + } else { + s->rv10_first_dc_coded[component] = 1; + } + } else { + level = get_bits(&s->gb, 8); + if (level == 255) + level = 128; + } +#endif + }else{ + level = get_bits(&s->gb, 8); + if((level&0x7F) == 0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y); + if(s->error_resilience >= FF_ER_COMPLIANT) + return -1; + } + if (level == 255) + level = 128; + } + block[0] = level; + i = 1; + } else { + i = 0; + } + if (!coded) { + if (s->mb_intra && s->h263_aic) + goto not_coded; + s->block_last_index[n] = i - 1; + return 0; + } +retry: + for(;;) { + code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2); + if (code < 0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + if (code == rl->n) { + /* escape */ + if (s->h263_flv > 1) { + int is11 = get_bits1(&s->gb); + last = get_bits1(&s->gb); + run = get_bits(&s->gb, 6); + if(is11){ + level = get_sbits(&s->gb, 11); + } else { + level = get_sbits(&s->gb, 7); + } + } else { + last = get_bits1(&s->gb); + run = get_bits(&s->gb, 6); + level = (int8_t)get_bits(&s->gb, 8); + if(level == -128){ + if (s->codec_id == CODEC_ID_RV10) { + /* XXX: should patch encoder too */ + level = get_sbits(&s->gb, 12); + }else{ + level = get_bits(&s->gb, 5); + level |= get_sbits(&s->gb, 6)<<5; + } + } + } + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + if (get_bits1(&s->gb)) + level = -level; + } + i += run; + if (i >= 64){ + if(s->alt_inter_vlc && rl == &rl_inter && !s->mb_intra){ + //looks like a hack but no, it's the way its supposed to work ... + rl = &rl_intra_aic; + i = 0; + s->gb= gb; + memset(block, 0, sizeof(DCTELEM)*64); + goto retry; + } + av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); + return -1; + } + j = scan_table[i]; + block[j] = level; + if (last) + break; + i++; + } +not_coded: + if (s->mb_intra && s->h263_aic) { + h263_pred_acdc(s, block, n); + i = 63; + } + s->block_last_index[n] = i; + return 0; +} + +/** + * decodes the dc value. + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir_ptr the prediction direction will be stored here + * @return the quantized dc + */ +static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) +{ + int level, code; + + if (n < 4) + code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1); + else + code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1); + if (code < 0 || code > 9 /* && s->nbit<9 */){ + av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n"); + return -1; + } + if (code == 0) { + level = 0; + } else { + if(IS_3IV1){ + if(code==1) + level= 2*get_bits1(&s->gb)-1; + else{ + if(get_bits1(&s->gb)) + level = get_bits(&s->gb, code-1) + (1<<(code-1)); + else + level = -get_bits(&s->gb, code-1) - (1<<(code-1)); + } + }else{ + level = get_xbits(&s->gb, code); + } + + if (code > 8){ + if(get_bits1(&s->gb)==0){ /* marker */ + if(s->error_resilience>=2){ + av_log(s->avctx, AV_LOG_ERROR, "dc marker bit missing\n"); + return -1; + } + } + } + } + + return ff_mpeg4_pred_dc(s, n, level, dir_ptr, 0); +} + +/** + * decodes a block. + * @return <0 if an error occured + */ +static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded, int intra, int rvlc) +{ + int level, i, last, run; + int dc_pred_dir; + RLTable * rl; + RL_VLC_ELEM * rl_vlc; + const uint8_t * scan_table; + int qmul, qadd; + + //Note intra & rvlc should be optimized away if this is inlined + + if(intra) { + if(s->qscale < s->intra_dc_threshold){ + /* DC coef */ + if(s->partitioned_frame){ + level = s->dc_val[0][ s->block_index[n] ]; + if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); + else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale); + dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<ac_pred) { + if (dc_pred_dir == 0) + scan_table = s->intra_v_scantable.permutated; /* left */ + else + scan_table = s->intra_h_scantable.permutated; /* top */ + } else { + scan_table = s->intra_scantable.permutated; + } + qmul=1; + qadd=0; + } else { + i = -1; + if (!coded) { + s->block_last_index[n] = i; + return 0; + } + if(rvlc) rl = &rvlc_rl_inter; + else rl = &rl_inter; + + scan_table = s->intra_scantable.permutated; + + if(s->mpeg_quant){ + qmul=1; + qadd=0; + if(rvlc){ + rl_vlc = rvlc_rl_inter.rl_vlc[0]; + }else{ + rl_vlc = rl_inter.rl_vlc[0]; + } + }else{ + qmul = s->qscale << 1; + qadd = (s->qscale - 1) | 1; + if(rvlc){ + rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale]; + }else{ + rl_vlc = rl_inter.rl_vlc[s->qscale]; + } + } + } + { + OPEN_READER(re, &s->gb); + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0); + if (level==0) { + /* escape */ + if(rvlc){ + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in rvlc esc\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 1); + + last= SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1); + run= SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6); + SKIP_COUNTER(re, &s->gb, 1+1+6); + UPDATE_CACHE(re, &s->gb); + + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in rvlc esc\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 1); + + level= SHOW_UBITS(re, &s->gb, 11); SKIP_CACHE(re, &s->gb, 11); + + if(SHOW_UBITS(re, &s->gb, 5)!=0x10){ + av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 5); + + level= level * qmul + qadd; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_CACHE(re, &s->gb, 1); + SKIP_COUNTER(re, &s->gb, 1+11+5+1); + + i+= run + 1; + if(last) i+=192; + }else{ + int cache; + cache= GET_CACHE(re, &s->gb); + + if(IS_3IV1) + cache ^= 0xC0000000; + + if (cache&0x80000000) { + if (cache&0x40000000) { + /* third escape */ + SKIP_CACHE(re, &s->gb, 2); + last= SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1); + run= SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6); + SKIP_COUNTER(re, &s->gb, 2+1+6); + UPDATE_CACHE(re, &s->gb); + + if(IS_3IV1){ + level= SHOW_SBITS(re, &s->gb, 12); LAST_SKIP_BITS(re, &s->gb, 12); + }else{ + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in 3. esc\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 1); + + level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12); + + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in 3. esc\n"); + return -1; + }; LAST_SKIP_CACHE(re, &s->gb, 1); + + SKIP_COUNTER(re, &s->gb, 1+12+1); + } + +#if 0 + if(s->error_resilience >= FF_ER_COMPLIANT){ + const int abs_level= ABS(level); + if(abs_level<=MAX_LEVEL && run<=MAX_RUN){ + const int run1= run - rl->max_run[last][abs_level] - 1; + if(abs_level <= rl->max_level[last][run]){ + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n"); + return -1; + } + if(s->error_resilience > FF_ER_COMPLIANT){ + if(abs_level <= rl->max_level[last][run]*2){ + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n"); + return -1; + } + if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){ + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n"); + return -1; + } + } + } + } +#endif + if (level>0) level= level * qmul + qadd; + else level= level * qmul - qadd; + + if((unsigned)(level + 2048) > 4095){ + if(s->error_resilience > FF_ER_COMPLIANT){ + if(level > 2560 || level<-2560){ + av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc, qp=%d\n", s->qscale); + return -1; + } + } + level= level<0 ? -2048 : 2047; + } + + i+= run + 1; + if(last) i+=192; + } else { + /* second escape */ +#if MIN_CACHE_BITS < 20 + LAST_SKIP_BITS(re, &s->gb, 2); + UPDATE_CACHE(re, &s->gb); +#else + SKIP_BITS(re, &s->gb, 2); +#endif + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1); + i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } + } else { + /* first escape */ +#if MIN_CACHE_BITS < 19 + LAST_SKIP_BITS(re, &s->gb, 1); + UPDATE_CACHE(re, &s->gb); +#else + SKIP_BITS(re, &s->gb, 1); +#endif + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1); + i+= run; + level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } + } + } else { + i+= run; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } + if (i > 62){ + i-= 192; + if(i&(~63)){ + av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + block[scan_table[i]] = level; + break; + } + + block[scan_table[i]] = level; + } + CLOSE_READER(re, &s->gb); + } + not_coded: + if (intra) { + if(s->qscale >= s->intra_dc_threshold){ + block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0); + + i -= i>>31; //if(i == -1) i=0; + } + + mpeg4_pred_ac(s, block, n, dc_pred_dir); + if (s->ac_pred) { + i = 63; /* XXX: not optimal */ + } + } + s->block_last_index[n] = i; + return 0; +} + +/* most is hardcoded. should extend to handle all h263 streams */ +int h263_decode_picture_header(MpegEncContext *s) +{ + int format, width, height, i; + uint32_t startcode; + + align_get_bits(&s->gb); + + startcode= get_bits(&s->gb, 22-8); + + for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=8) { + startcode = ((startcode << 8) | get_bits(&s->gb, 8)) & 0x003FFFFF; + + if(startcode == 0x20) + break; + } + + if (startcode != 0x20) { + av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n"); + return -1; + } + /* temporal reference */ + i = get_bits(&s->gb, 8); /* picture timestamp */ + if( (s->picture_number&~0xFF)+i < s->picture_number) + i+= 256; + s->current_picture_ptr->pts= + s->picture_number= (s->picture_number&~0xFF) + i; + + /* PTYPE starts here */ + if (get_bits1(&s->gb) != 1) { + /* marker */ + av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n"); + return -1; + } + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); + return -1; /* h263 id */ + } + skip_bits1(&s->gb); /* split screen off */ + skip_bits1(&s->gb); /* camera off */ + skip_bits1(&s->gb); /* freeze picture release off */ + + format = get_bits(&s->gb, 3); + /* + 0 forbidden + 1 sub-QCIF + 10 QCIF + 7 extended PTYPE (PLUSPTYPE) + */ + + if (format != 7 && format != 6) { + s->h263_plus = 0; + /* H.263v1 */ + width = h263_format[format][0]; + height = h263_format[format][1]; + if (!width) + return -1; + + s->pict_type = I_TYPE + get_bits1(&s->gb); + + s->h263_long_vectors = get_bits1(&s->gb); + + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n"); + return -1; /* SAC: off */ + } + s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */ + s->unrestricted_mv = s->h263_long_vectors || s->obmc; + + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n"); + return -1; /* not PB frame */ + } + s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); + skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ + + s->width = width; + s->height = height; + s->avctx->sample_aspect_ratio= (AVRational){12,11}; + s->avctx->time_base= (AVRational){1001, 30000}; + } else { + int ufep; + + /* H.263v2 */ + s->h263_plus = 1; + ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */ + + /* ufep other than 0 and 1 are reserved */ + if (ufep == 1) { + /* OPPTYPE */ + format = get_bits(&s->gb, 3); + dprintf("ufep=1, format: %d\n", format); + s->custom_pcf= get_bits1(&s->gb); + s->umvplus = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */ + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "Syntax-based Arithmetic Coding (SAC) not supported\n"); + } + s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */ + s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */ + s->loop_filter= get_bits1(&s->gb); + s->unrestricted_mv = s->umvplus || s->obmc || s->loop_filter; + + s->h263_slice_structured= get_bits1(&s->gb); + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "Reference Picture Selection not supported\n"); + } + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "Independent Segment Decoding not supported\n"); + } + s->alt_inter_vlc= get_bits1(&s->gb); + s->modified_quant= get_bits1(&s->gb); + if(s->modified_quant) + s->chroma_qscale_table= ff_h263_chroma_qscale_table; + + skip_bits(&s->gb, 1); /* Prevent start code emulation */ + + skip_bits(&s->gb, 3); /* Reserved */ + } else if (ufep != 0) { + av_log(s->avctx, AV_LOG_ERROR, "Bad UFEP type (%d)\n", ufep); + return -1; + } + + /* MPPTYPE */ + s->pict_type = get_bits(&s->gb, 3); + switch(s->pict_type){ + case 0: s->pict_type= I_TYPE;break; + case 1: s->pict_type= P_TYPE;break; + case 3: s->pict_type= B_TYPE;break; + case 7: s->pict_type= I_TYPE;break; //ZYGO + default: + return -1; + } + skip_bits(&s->gb, 2); + s->no_rounding = get_bits1(&s->gb); + skip_bits(&s->gb, 4); + + /* Get the picture dimensions */ + if (ufep) { + if (format == 6) { + /* Custom Picture Format (CPFMT) */ + s->aspect_ratio_info = get_bits(&s->gb, 4); + dprintf("aspect: %d\n", s->aspect_ratio_info); + /* aspect ratios: + 0 - forbidden + 1 - 1:1 + 2 - 12:11 (CIF 4:3) + 3 - 10:11 (525-type 4:3) + 4 - 16:11 (CIF 16:9) + 5 - 40:33 (525-type 16:9) + 6-14 - reserved + */ + width = (get_bits(&s->gb, 9) + 1) * 4; + skip_bits1(&s->gb); + height = get_bits(&s->gb, 9) * 4; + dprintf("\nH.263+ Custom picture: %dx%d\n",width,height); + if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) { + /* aspected dimensions */ + s->avctx->sample_aspect_ratio.num= get_bits(&s->gb, 8); + s->avctx->sample_aspect_ratio.den= get_bits(&s->gb, 8); + }else{ + s->avctx->sample_aspect_ratio= pixel_aspect[s->aspect_ratio_info]; + } + } else { + width = h263_format[format][0]; + height = h263_format[format][1]; + s->avctx->sample_aspect_ratio= (AVRational){12,11}; + } + if ((width == 0) || (height == 0)) + return -1; + s->width = width; + s->height = height; + + if(s->custom_pcf){ + int gcd; + s->avctx->time_base.den= 1800000; + s->avctx->time_base.num= 1000 + get_bits1(&s->gb); + s->avctx->time_base.num*= get_bits(&s->gb, 7); + if(s->avctx->time_base.num == 0){ + av_log(s, AV_LOG_ERROR, "zero framerate\n"); + return -1; + } + gcd= ff_gcd(s->avctx->time_base.den, s->avctx->time_base.num); + s->avctx->time_base.den /= gcd; + s->avctx->time_base.num /= gcd; +// av_log(s->avctx, AV_LOG_DEBUG, "%d/%d\n", s->avctx->time_base.den, s->avctx->time_base.num); + }else{ + s->avctx->time_base= (AVRational){1001, 30000}; + } + } + + if(s->custom_pcf){ + skip_bits(&s->gb, 2); //extended Temporal reference + } + + if (ufep) { + if (s->umvplus) { + if(get_bits1(&s->gb)==0) /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */ + skip_bits1(&s->gb); + } + if(s->h263_slice_structured){ + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "rectangular slices not supported\n"); + } + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "unordered slices not supported\n"); + } + } + } + + s->qscale = get_bits(&s->gb, 5); + } + + s->mb_width = (s->width + 15) / 16; + s->mb_height = (s->height + 15) / 16; + s->mb_num = s->mb_width * s->mb_height; + + /* PEI */ + while (get_bits1(&s->gb) != 0) { + skip_bits(&s->gb, 8); + } + + if(s->h263_slice_structured){ + if (get_bits1(&s->gb) != 1) { + av_log(s->avctx, AV_LOG_ERROR, "SEPB1 marker missing\n"); + return -1; + } + + ff_h263_decode_mba(s); + + if (get_bits1(&s->gb) != 1) { + av_log(s->avctx, AV_LOG_ERROR, "SEPB2 marker missing\n"); + return -1; + } + } + s->f_code = 1; + + if(s->h263_aic){ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_aic_dc_scale_table; + }else{ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + } + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n", + s->qscale, av_get_pict_type_char(s->pict_type), + s->gb.size_in_bits, 1-s->no_rounding, + s->obmc ? " AP" : "", + s->umvplus ? " UMV" : "", + s->h263_long_vectors ? " LONG" : "", + s->h263_plus ? " +" : "", + s->h263_aic ? " AIC" : "", + s->alt_inter_vlc ? " AIV" : "", + s->modified_quant ? " MQ" : "", + s->loop_filter ? " LOOP" : "", + s->h263_slice_structured ? " SS" : "", + s->avctx->time_base.den, s->avctx->time_base.num + ); + } +#if 1 + if (s->pict_type == I_TYPE && s->avctx->codec_tag == ff_get_fourcc("ZYGO")){ + int i,j; + for(i=0; i<85; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb)); + av_log(s->avctx, AV_LOG_DEBUG, "\n"); + for(i=0; i<13; i++){ + for(j=0; j<3; j++){ + int v= get_bits(&s->gb, 8); + v |= get_sbits(&s->gb, 8)<<8; + av_log(s->avctx, AV_LOG_DEBUG, " %5d", v); + } + av_log(s->avctx, AV_LOG_DEBUG, "\n"); + } + for(i=0; i<50; i++) av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb)); + } +#endif + + return 0; +} + +static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb) +{ + int i; + int a= 2<sprite_warping_accuracy; + int rho= 3-s->sprite_warping_accuracy; + int r=16/a; + const int vop_ref[4][2]= {{0,0}, {s->width,0}, {0, s->height}, {s->width, s->height}}; // only true for rectangle shapes + int d[4][2]={{0,0}, {0,0}, {0,0}, {0,0}}; + int sprite_ref[4][2]; + int virtual_ref[2][2]; + int w2, h2, w3, h3; + int alpha=0, beta=0; + int w= s->width; + int h= s->height; + int min_ab; + + for(i=0; inum_sprite_warping_points; i++){ + int length; + int x=0, y=0; + + length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3); + if(length){ + x= get_xbits(gb, length); + } + if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(gb); /* marker bit */ + + length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3); + if(length){ + y=get_xbits(gb, length); + } + skip_bits1(gb); /* marker bit */ +//printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy); + d[i][0]= x; + d[i][1]= y; + } + + while((1<divx_version==500 && s->divx_build==413){ + sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0]; + sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1]; + sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0]; + sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1]; + sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0]; + sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1]; + } else { + sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]); + sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]); + sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]); + sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]); + sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]); + sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]); + } +/* sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]); + sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */ + +// this is mostly identical to the mpeg4 std (and is totally unreadable because of that ...) +// perhaps it should be reordered to be more readable ... +// the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides +// so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form + virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) + + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w); + virtual_ref[0][1]= 16*vop_ref[0][1] + + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w); + virtual_ref[1][0]= 16*vop_ref[0][0] + + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h); + virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) + + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h); + + switch(s->num_sprite_warping_points) + { + case 0: + s->sprite_offset[0][0]= 0; + s->sprite_offset[0][1]= 0; + s->sprite_offset[1][0]= 0; + s->sprite_offset[1][1]= 0; + s->sprite_delta[0][0]= a; + s->sprite_delta[0][1]= 0; + s->sprite_delta[1][0]= 0; + s->sprite_delta[1][1]= a; + s->sprite_shift[0]= 0; + s->sprite_shift[1]= 0; + break; + case 1: //GMC only + s->sprite_offset[0][0]= sprite_ref[0][0] - a*vop_ref[0][0]; + s->sprite_offset[0][1]= sprite_ref[0][1] - a*vop_ref[0][1]; + s->sprite_offset[1][0]= ((sprite_ref[0][0]>>1)|(sprite_ref[0][0]&1)) - a*(vop_ref[0][0]/2); + s->sprite_offset[1][1]= ((sprite_ref[0][1]>>1)|(sprite_ref[0][1]&1)) - a*(vop_ref[0][1]/2); + s->sprite_delta[0][0]= a; + s->sprite_delta[0][1]= 0; + s->sprite_delta[1][0]= 0; + s->sprite_delta[1][1]= a; + s->sprite_shift[0]= 0; + s->sprite_shift[1]= 0; + break; + case 2: + s->sprite_offset[0][0]= (sprite_ref[0][0]<<(alpha+rho)) + + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][0]) + + ( r*sprite_ref[0][1] - virtual_ref[0][1])*(-vop_ref[0][1]) + + (1<<(alpha+rho-1)); + s->sprite_offset[0][1]= (sprite_ref[0][1]<<(alpha+rho)) + + (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-vop_ref[0][0]) + + (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-vop_ref[0][1]) + + (1<<(alpha+rho-1)); + s->sprite_offset[1][0]= ( (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1) + +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1) + +2*w2*r*sprite_ref[0][0] + - 16*w2 + + (1<<(alpha+rho+1))); + s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) + +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1) + +2*w2*r*sprite_ref[0][1] + - 16*w2 + + (1<<(alpha+rho+1))); + s->sprite_delta[0][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0]); + s->sprite_delta[0][1]= (+r*sprite_ref[0][1] - virtual_ref[0][1]); + s->sprite_delta[1][0]= (-r*sprite_ref[0][1] + virtual_ref[0][1]); + s->sprite_delta[1][1]= (-r*sprite_ref[0][0] + virtual_ref[0][0]); + + s->sprite_shift[0]= alpha+rho; + s->sprite_shift[1]= alpha+rho+2; + break; + case 3: + min_ab= FFMIN(alpha, beta); + w3= w2>>min_ab; + h3= h2>>min_ab; + s->sprite_offset[0][0]= (sprite_ref[0][0]<<(alpha+beta+rho-min_ab)) + + (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-vop_ref[0][0]) + + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-vop_ref[0][1]) + + (1<<(alpha+beta+rho-min_ab-1)); + s->sprite_offset[0][1]= (sprite_ref[0][1]<<(alpha+beta+rho-min_ab)) + + (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-vop_ref[0][0]) + + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-vop_ref[0][1]) + + (1<<(alpha+beta+rho-min_ab-1)); + s->sprite_offset[1][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3*(-2*vop_ref[0][0] + 1) + + (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3*(-2*vop_ref[0][1] + 1) + + 2*w2*h3*r*sprite_ref[0][0] + - 16*w2*h3 + + (1<<(alpha+beta+rho-min_ab+1)); + s->sprite_offset[1][1]= (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3*(-2*vop_ref[0][0] + 1) + + (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3*(-2*vop_ref[0][1] + 1) + + 2*w2*h3*r*sprite_ref[0][1] + - 16*w2*h3 + + (1<<(alpha+beta+rho-min_ab+1)); + s->sprite_delta[0][0]= (-r*sprite_ref[0][0] + virtual_ref[0][0])*h3; + s->sprite_delta[0][1]= (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3; + s->sprite_delta[1][0]= (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3; + s->sprite_delta[1][1]= (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3; + + s->sprite_shift[0]= alpha + beta + rho - min_ab; + s->sprite_shift[1]= alpha + beta + rho - min_ab + 2; + break; + } + /* try to simplify the situation */ + if( s->sprite_delta[0][0] == a<sprite_shift[0] + && s->sprite_delta[0][1] == 0 + && s->sprite_delta[1][0] == 0 + && s->sprite_delta[1][1] == a<sprite_shift[0]) + { + s->sprite_offset[0][0]>>=s->sprite_shift[0]; + s->sprite_offset[0][1]>>=s->sprite_shift[0]; + s->sprite_offset[1][0]>>=s->sprite_shift[1]; + s->sprite_offset[1][1]>>=s->sprite_shift[1]; + s->sprite_delta[0][0]= a; + s->sprite_delta[0][1]= 0; + s->sprite_delta[1][0]= 0; + s->sprite_delta[1][1]= a; + s->sprite_shift[0]= 0; + s->sprite_shift[1]= 0; + s->real_sprite_warping_points=1; + } + else{ + int shift_y= 16 - s->sprite_shift[0]; + int shift_c= 16 - s->sprite_shift[1]; +//printf("shifts %d %d\n", shift_y, shift_c); + for(i=0; i<2; i++){ + s->sprite_offset[0][i]<<= shift_y; + s->sprite_offset[1][i]<<= shift_c; + s->sprite_delta[0][i]<<= shift_y; + s->sprite_delta[1][i]<<= shift_y; + s->sprite_shift[i]= 16; + } + s->real_sprite_warping_points= s->num_sprite_warping_points; + } +#if 0 +printf("vop:%d:%d %d:%d %d:%d, sprite:%d:%d %d:%d %d:%d, virtual: %d:%d %d:%d\n", + vop_ref[0][0], vop_ref[0][1], + vop_ref[1][0], vop_ref[1][1], + vop_ref[2][0], vop_ref[2][1], + sprite_ref[0][0], sprite_ref[0][1], + sprite_ref[1][0], sprite_ref[1][1], + sprite_ref[2][0], sprite_ref[2][1], + virtual_ref[0][0], virtual_ref[0][1], + virtual_ref[1][0], virtual_ref[1][1] + ); + +printf("offset: %d:%d , delta: %d %d %d %d, shift %d\n", + s->sprite_offset[0][0], s->sprite_offset[0][1], + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + s->sprite_shift[0] + ); +#endif +} + +static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){ + int hours, minutes, seconds; + + hours= get_bits(gb, 5); + minutes= get_bits(gb, 6); + skip_bits1(gb); + seconds= get_bits(gb, 6); + + s->time_base= seconds + 60*(minutes + 60*hours); + + skip_bits1(gb); + skip_bits1(gb); + + return 0; +} + +static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ + int width, height, vo_ver_id; + + /* vol header */ + skip_bits(gb, 1); /* random access */ + s->vo_type= get_bits(gb, 8); + if (get_bits1(gb) != 0) { /* is_ol_id */ + vo_ver_id = get_bits(gb, 4); /* vo_ver_id */ + skip_bits(gb, 3); /* vo_priority */ + } else { + vo_ver_id = 1; + } +//printf("vo type:%d\n",s->vo_type); + s->aspect_ratio_info= get_bits(gb, 4); + if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){ + s->avctx->sample_aspect_ratio.num= get_bits(gb, 8); // par_width + s->avctx->sample_aspect_ratio.den= get_bits(gb, 8); // par_height + }else{ + s->avctx->sample_aspect_ratio= pixel_aspect[s->aspect_ratio_info]; + } + + if ((s->vol_control_parameters=get_bits1(gb))) { /* vol control parameter */ + int chroma_format= get_bits(gb, 2); + if(chroma_format!=1){ + av_log(s->avctx, AV_LOG_ERROR, "illegal chroma format\n"); + } + s->low_delay= get_bits1(gb); + if(get_bits1(gb)){ /* vbv parameters */ + get_bits(gb, 15); /* first_half_bitrate */ + skip_bits1(gb); /* marker */ + get_bits(gb, 15); /* latter_half_bitrate */ + skip_bits1(gb); /* marker */ + get_bits(gb, 15); /* first_half_vbv_buffer_size */ + skip_bits1(gb); /* marker */ + get_bits(gb, 3); /* latter_half_vbv_buffer_size */ + get_bits(gb, 11); /* first_half_vbv_occupancy */ + skip_bits1(gb); /* marker */ + get_bits(gb, 15); /* latter_half_vbv_occupancy */ + skip_bits1(gb); /* marker */ + } + }else{ + // set low delay flag only once the smartest? low delay detection won't be overriden + if(s->picture_number==0) + s->low_delay=0; + } + + s->shape = get_bits(gb, 2); /* vol shape */ + if(s->shape != RECT_SHAPE) av_log(s->avctx, AV_LOG_ERROR, "only rectangular vol supported\n"); + if(s->shape == GRAY_SHAPE && vo_ver_id != 1){ + av_log(s->avctx, AV_LOG_ERROR, "Gray shape not supported\n"); + skip_bits(gb, 4); //video_object_layer_shape_extension + } + + check_marker(gb, "before time_increment_resolution"); + + s->avctx->time_base.den = get_bits(gb, 16); + if(!s->avctx->time_base.den){ + av_log(s->avctx, AV_LOG_ERROR, "time_base.den==0\n"); + return -1; + } + + s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1; + if (s->time_increment_bits < 1) + s->time_increment_bits = 1; + + check_marker(gb, "before fixed_vop_rate"); + + if (get_bits1(gb) != 0) { /* fixed_vop_rate */ + s->avctx->time_base.num = get_bits(gb, s->time_increment_bits); + }else + s->avctx->time_base.num = 1; + + s->t_frame=0; + + if (s->shape != BIN_ONLY_SHAPE) { + if (s->shape == RECT_SHAPE) { + skip_bits1(gb); /* marker */ + width = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + height = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + if(width && height && !(s->width && s->avctx->codec_tag == ff_get_fourcc("MP4S"))){ /* they should be non zero but who knows ... */ + s->width = width; + s->height = height; +// printf("width/height: %d %d\n", width, height); + } + } + + s->progressive_sequence= + s->progressive_frame= get_bits1(gb)^1; + s->interlaced_dct=0; + if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO)) + av_log(s->avctx, AV_LOG_INFO, "MPEG4 OBMC not supported (very likely buggy encoder)\n"); /* OBMC Disable */ + if (vo_ver_id == 1) { + s->vol_sprite_usage = get_bits1(gb); /* vol_sprite_usage */ + } else { + s->vol_sprite_usage = get_bits(gb, 2); /* vol_sprite_usage */ + } + if(s->vol_sprite_usage==STATIC_SPRITE) av_log(s->avctx, AV_LOG_ERROR, "Static Sprites not supported\n"); + if(s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE){ + if(s->vol_sprite_usage==STATIC_SPRITE){ + s->sprite_width = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + s->sprite_height= get_bits(gb, 13); + skip_bits1(gb); /* marker */ + s->sprite_left = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + s->sprite_top = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + } + s->num_sprite_warping_points= get_bits(gb, 6); + s->sprite_warping_accuracy = get_bits(gb, 2); + s->sprite_brightness_change= get_bits1(gb); + if(s->vol_sprite_usage==STATIC_SPRITE) + s->low_latency_sprite= get_bits1(gb); + } + // FIXME sadct disable bit if verid!=1 && shape not rect + + if (get_bits1(gb) == 1) { /* not_8_bit */ + s->quant_precision = get_bits(gb, 4); /* quant_precision */ + if(get_bits(gb, 4)!=8) av_log(s->avctx, AV_LOG_ERROR, "N-bit not supported\n"); /* bits_per_pixel */ + if(s->quant_precision!=5) av_log(s->avctx, AV_LOG_ERROR, "quant precision %d\n", s->quant_precision); + } else { + s->quant_precision = 5; + } + + // FIXME a bunch of grayscale shape things + + if((s->mpeg_quant=get_bits1(gb))){ /* vol_quant_type */ + int i, v; + + /* load default matrixes */ + for(i=0; i<64; i++){ + int j= s->dsp.idct_permutation[i]; + v= ff_mpeg4_default_intra_matrix[i]; + s->intra_matrix[j]= v; + s->chroma_intra_matrix[j]= v; + + v= ff_mpeg4_default_non_intra_matrix[i]; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; + } + + /* load custom intra matrix */ + if(get_bits1(gb)){ + int last=0; + for(i=0; i<64; i++){ + int j; + v= get_bits(gb, 8); + if(v==0) break; + + last= v; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; + s->intra_matrix[j]= v; + s->chroma_intra_matrix[j]= v; + } + + /* replicate last value */ + for(; i<64; i++){ + int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; + s->intra_matrix[j]= last; + s->chroma_intra_matrix[j]= last; + } + } + + /* load custom non intra matrix */ + if(get_bits1(gb)){ + int last=0; + for(i=0; i<64; i++){ + int j; + v= get_bits(gb, 8); + if(v==0) break; + + last= v; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; + } + + /* replicate last value */ + for(; i<64; i++){ + int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; + s->inter_matrix[j]= last; + s->chroma_inter_matrix[j]= last; + } + } + + // FIXME a bunch of grayscale shape things + } + + if(vo_ver_id != 1) + s->quarter_sample= get_bits1(gb); + else s->quarter_sample=0; + + if(!get_bits1(gb)) av_log(s->avctx, AV_LOG_ERROR, "Complexity estimation not supported\n"); + + s->resync_marker= !get_bits1(gb); /* resync_marker_disabled */ + + s->data_partitioning= get_bits1(gb); + if(s->data_partitioning){ + s->rvlc= get_bits1(gb); + } + + if(vo_ver_id != 1) { + s->new_pred= get_bits1(gb); + if(s->new_pred){ + av_log(s->avctx, AV_LOG_ERROR, "new pred not supported\n"); + skip_bits(gb, 2); /* requested upstream message type */ + skip_bits1(gb); /* newpred segment type */ + } + s->reduced_res_vop= get_bits1(gb); + if(s->reduced_res_vop) av_log(s->avctx, AV_LOG_ERROR, "reduced resolution VOP not supported\n"); + } + else{ + s->new_pred=0; + s->reduced_res_vop= 0; + } + + s->scalability= get_bits1(gb); + + if (s->scalability) { + GetBitContext bak= *gb; + int ref_layer_id; + int ref_layer_sampling_dir; + int h_sampling_factor_n; + int h_sampling_factor_m; + int v_sampling_factor_n; + int v_sampling_factor_m; + + s->hierachy_type= get_bits1(gb); + ref_layer_id= get_bits(gb, 4); + ref_layer_sampling_dir= get_bits1(gb); + h_sampling_factor_n= get_bits(gb, 5); + h_sampling_factor_m= get_bits(gb, 5); + v_sampling_factor_n= get_bits(gb, 5); + v_sampling_factor_m= get_bits(gb, 5); + s->enhancement_type= get_bits1(gb); + + if( h_sampling_factor_n==0 || h_sampling_factor_m==0 + || v_sampling_factor_n==0 || v_sampling_factor_m==0){ + +// fprintf(stderr, "illegal scalability header (VERY broken encoder), trying to workaround\n"); + s->scalability=0; + + *gb= bak; + }else + av_log(s->avctx, AV_LOG_ERROR, "scalability not supported\n"); + + // bin shape stuff FIXME + } + } + return 0; +} + +/** + * decodes the user data stuff in the header. + * allso inits divx/xvid/lavc_version/build + */ +static int decode_user_data(MpegEncContext *s, GetBitContext *gb){ + char buf[256]; + int i; + int e; + int ver = 0, build = 0, ver2 = 0, ver3 = 0; + char last; + + for(i=0; i<255 && gb->index < gb->size_in_bits; i++){ + if(show_bits(gb, 23) == 0) break; + buf[i]= get_bits(gb, 8); + } + buf[i]=0; + + /* divx detection */ + e=sscanf(buf, "DivX%dBuild%d%c", &ver, &build, &last); + if(e<2) + e=sscanf(buf, "DivX%db%d%c", &ver, &build, &last); + if(e>=2){ + s->divx_version= ver; + s->divx_build= build; + s->divx_packed= e==3 && last=='p'; + } + + /* ffmpeg detection */ + e=sscanf(buf, "FFmpe%*[^b]b%d", &build)+3; + if(e!=4) + e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build); + if(e!=4){ + e=sscanf(buf, "Lavc%d.%d.%d", &ver, &ver2, &ver3)+1; + if (e>1) + build= (ver<<16) + (ver2<<8) + ver3; + } + if(e!=4){ + if(strcmp(buf, "ffmpeg")==0){ + s->lavc_build= 4600; + } + } + if(e==4){ + s->lavc_build= build; + } + + /* xvid detection */ + e=sscanf(buf, "XviD%d", &build); + if(e==1){ + s->xvid_build= build; + } + +//printf("User Data: %s\n", buf); + return 0; +} + +static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ + int time_incr, time_increment; + + s->pict_type = get_bits(gb, 2) + I_TYPE; /* pict type: I = 0 , P = 1 */ + if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){ + av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n"); + s->low_delay=0; + } + + s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE; + if(s->partitioned_frame) + s->decode_mb= mpeg4_decode_partitioned_mb; + else + s->decode_mb= ff_mpeg4_decode_mb; + + time_incr=0; + while (get_bits1(gb) != 0) + time_incr++; + + check_marker(gb, "before time_increment"); + + if(s->time_increment_bits==0 || !(show_bits(gb, s->time_increment_bits+1)&1)){ + av_log(s->avctx, AV_LOG_ERROR, "hmm, seems the headers are not complete, trying to guess time_increment_bits\n"); + + for(s->time_increment_bits=1 ;s->time_increment_bits<16; s->time_increment_bits++){ + if(show_bits(gb, s->time_increment_bits+1)&1) break; + } + + av_log(s->avctx, AV_LOG_ERROR, "my guess is %d bits ;)\n",s->time_increment_bits); + } + + if(IS_3IV1) time_increment= get_bits1(gb); //FIXME investigate further + else time_increment= get_bits(gb, s->time_increment_bits); + +// printf("%d %X\n", s->time_increment_bits, time_increment); +//av_log(s->avctx, AV_LOG_DEBUG, " type:%d modulo_time_base:%d increment:%d t_frame %d\n", s->pict_type, time_incr, time_increment, s->t_frame); + if(s->pict_type!=B_TYPE){ + s->last_time_base= s->time_base; + s->time_base+= time_incr; + s->time= s->time_base*s->avctx->time_base.den + time_increment; + if(s->workaround_bugs&FF_BUG_UMP4){ + if(s->time < s->last_non_b_time){ +// fprintf(stderr, "header is not mpeg4 compatible, broken encoder, trying to workaround\n"); + s->time_base++; + s->time+= s->avctx->time_base.den; + } + } + s->pp_time= s->time - s->last_non_b_time; + s->last_non_b_time= s->time; + }else{ + s->time= (s->last_time_base + time_incr)*s->avctx->time_base.den + time_increment; + s->pb_time= s->pp_time - (s->last_non_b_time - s->time); + if(s->pp_time <=s->pb_time || s->pp_time <= s->pp_time - s->pb_time || s->pp_time<=0){ +// printf("messed up order, maybe after seeking? skipping current b frame\n"); + return FRAME_SKIPPED; + } + ff_mpeg4_init_direct_mv(s); + + if(s->t_frame==0) s->t_frame= s->pb_time; + if(s->t_frame==0) s->t_frame=1; // 1/0 protection + s->pp_field_time= ( ROUNDED_DIV(s->last_non_b_time, s->t_frame) + - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2; + s->pb_field_time= ( ROUNDED_DIV(s->time, s->t_frame) + - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2; + } +//av_log(s->avctx, AV_LOG_DEBUG, "last nonb %Ld last_base %d time %Ld pp %d pb %d t %d ppf %d pbf %d\n", s->last_non_b_time, s->last_time_base, s->time, s->pp_time, s->pb_time, s->t_frame, s->pp_field_time, s->pb_field_time); + + if(s->avctx->time_base.num) + s->current_picture_ptr->pts= (s->time + s->avctx->time_base.num/2) / s->avctx->time_base.num; + else + s->current_picture_ptr->pts= AV_NOPTS_VALUE; + if(s->avctx->debug&FF_DEBUG_PTS) + av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %"PRId64"\n", s->current_picture_ptr->pts); + + check_marker(gb, "before vop_coded"); + + /* vop coded */ + if (get_bits1(gb) != 1){ + if(s->avctx->debug&FF_DEBUG_PICT_INFO) + av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n"); + return FRAME_SKIPPED; + } +//printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->avctx->time_base.den, s->time_base, +//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time); + if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE + || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) { + /* rounding type for motion estimation */ + s->no_rounding = get_bits1(gb); + } else { + s->no_rounding = 0; + } +//FIXME reduced res stuff + + if (s->shape != RECT_SHAPE) { + if (s->vol_sprite_usage != 1 || s->pict_type != I_TYPE) { + int width, height, hor_spat_ref, ver_spat_ref; + + width = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + height = get_bits(gb, 13); + skip_bits1(gb); /* marker */ + hor_spat_ref = get_bits(gb, 13); /* hor_spat_ref */ + skip_bits1(gb); /* marker */ + ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */ + } + skip_bits1(gb); /* change_CR_disable */ + + if (get_bits1(gb) != 0) { + skip_bits(gb, 8); /* constant_alpha_value */ + } + } +//FIXME complexity estimation stuff + + if (s->shape != BIN_ONLY_SHAPE) { + s->intra_dc_threshold= mpeg4_dc_threshold[ get_bits(gb, 3) ]; + if(!s->progressive_sequence){ + s->top_field_first= get_bits1(gb); + s->alternate_scan= get_bits1(gb); + }else + s->alternate_scan= 0; + } + + if(s->alternate_scan){ + ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_alternate_vertical_scan); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_vertical_scan); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); + } else{ + ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable , ff_zigzag_direct); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan); + ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan); + } + + if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){ + mpeg4_decode_sprite_trajectory(s, gb); + if(s->sprite_brightness_change) av_log(s->avctx, AV_LOG_ERROR, "sprite_brightness_change not supported\n"); + if(s->vol_sprite_usage==STATIC_SPRITE) av_log(s->avctx, AV_LOG_ERROR, "static sprite not supported\n"); + } + + if (s->shape != BIN_ONLY_SHAPE) { + s->chroma_qscale= s->qscale = get_bits(gb, s->quant_precision); + if(s->qscale==0){ + av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (qscale=0)\n"); + return -1; // makes no sense to continue, as there is nothing left from the image then + } + + if (s->pict_type != I_TYPE) { + s->f_code = get_bits(gb, 3); /* fcode_for */ + if(s->f_code==0){ + av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n"); + return -1; // makes no sense to continue, as the MV decoding will break very quickly + } + }else + s->f_code=1; + + if (s->pict_type == B_TYPE) { + s->b_code = get_bits(gb, 3); + }else + s->b_code=1; + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n", + s->qscale, s->f_code, s->b_code, + s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), + gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first, + s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points, + s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold); + } + + if(!s->scalability){ + if (s->shape!=RECT_SHAPE && s->pict_type!=I_TYPE) { + skip_bits1(gb); // vop shape coding type + } + }else{ + if(s->enhancement_type){ + int load_backward_shape= get_bits1(gb); + if(load_backward_shape){ + av_log(s->avctx, AV_LOG_ERROR, "load backward shape isn't supported\n"); + } + } + skip_bits(gb, 2); //ref_select_code + } + } + /* detect buggy encoders which don't set the low_delay flag (divx4/xvid/opendivx)*/ + // note we cannot detect divx5 without b-frames easily (although it's buggy too) + if(s->vo_type==0 && s->vol_control_parameters==0 && s->divx_version==0 && s->picture_number==0){ + av_log(s->avctx, AV_LOG_ERROR, "looks like this file was encoded with (divx4/(old)xvid/opendivx) -> forcing low_delay flag\n"); + s->low_delay=1; + } + + s->picture_number++; // better than pic number==0 always ;) + + s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support + s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table; + + if(s->workaround_bugs&FF_BUG_EDGE){ + s->h_edge_pos= s->width; + s->v_edge_pos= s->height; + } + return 0; +} + +/** + * decode mpeg4 headers + * @return <0 if no VOP found (or a damaged one) + * FRAME_SKIPPED if a not coded VOP is found + * 0 if a VOP is found + */ +int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb) +{ + int startcode, v; + + /* search next start code */ + align_get_bits(gb); + + if(s->avctx->codec_tag == ff_get_fourcc("WV1F") && show_bits(gb, 24) == 0x575630){ + skip_bits(gb, 24); + if(get_bits(gb, 8) == 0xF0) + return decode_vop_header(s, gb); + } + + startcode = 0xff; + for(;;) { + if(get_bits_count(gb) >= gb->size_in_bits){ + if(gb->size_in_bits==8 && (s->divx_version || s->xvid_build)){ + av_log(s->avctx, AV_LOG_ERROR, "frame skip %d\n", gb->size_in_bits); + return FRAME_SKIPPED; //divx bug + }else + return -1; //end of stream + } + + /* use the bits after the test */ + v = get_bits(gb, 8); + startcode = ((startcode << 8) | v) & 0xffffffff; + + if((startcode&0xFFFFFF00) != 0x100) + continue; //no startcode + + if(s->avctx->debug&FF_DEBUG_STARTCODE){ + av_log(s->avctx, AV_LOG_DEBUG, "startcode: %3X ", startcode); + if (startcode<=0x11F) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Start"); + else if(startcode<=0x12F) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Layer Start"); + else if(startcode<=0x13F) av_log(s->avctx, AV_LOG_DEBUG, "Reserved"); + else if(startcode<=0x15F) av_log(s->avctx, AV_LOG_DEBUG, "FGS bp start"); + else if(startcode<=0x1AF) av_log(s->avctx, AV_LOG_DEBUG, "Reserved"); + else if(startcode==0x1B0) av_log(s->avctx, AV_LOG_DEBUG, "Visual Object Seq Start"); + else if(startcode==0x1B1) av_log(s->avctx, AV_LOG_DEBUG, "Visual Object Seq End"); + else if(startcode==0x1B2) av_log(s->avctx, AV_LOG_DEBUG, "User Data"); + else if(startcode==0x1B3) av_log(s->avctx, AV_LOG_DEBUG, "Group of VOP start"); + else if(startcode==0x1B4) av_log(s->avctx, AV_LOG_DEBUG, "Video Session Error"); + else if(startcode==0x1B5) av_log(s->avctx, AV_LOG_DEBUG, "Visual Object Start"); + else if(startcode==0x1B6) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Plane start"); + else if(startcode==0x1B7) av_log(s->avctx, AV_LOG_DEBUG, "slice start"); + else if(startcode==0x1B8) av_log(s->avctx, AV_LOG_DEBUG, "extension start"); + else if(startcode==0x1B9) av_log(s->avctx, AV_LOG_DEBUG, "fgs start"); + else if(startcode==0x1BA) av_log(s->avctx, AV_LOG_DEBUG, "FBA Object start"); + else if(startcode==0x1BB) av_log(s->avctx, AV_LOG_DEBUG, "FBA Object Plane start"); + else if(startcode==0x1BC) av_log(s->avctx, AV_LOG_DEBUG, "Mesh Object start"); + else if(startcode==0x1BD) av_log(s->avctx, AV_LOG_DEBUG, "Mesh Object Plane start"); + else if(startcode==0x1BE) av_log(s->avctx, AV_LOG_DEBUG, "Still Texture Object start"); + else if(startcode==0x1BF) av_log(s->avctx, AV_LOG_DEBUG, "Texture Spatial Layer start"); + else if(startcode==0x1C0) av_log(s->avctx, AV_LOG_DEBUG, "Texture SNR Layer start"); + else if(startcode==0x1C1) av_log(s->avctx, AV_LOG_DEBUG, "Texture Tile start"); + else if(startcode==0x1C2) av_log(s->avctx, AV_LOG_DEBUG, "Texture Shape Layer start"); + else if(startcode==0x1C3) av_log(s->avctx, AV_LOG_DEBUG, "stuffing start"); + else if(startcode<=0x1C5) av_log(s->avctx, AV_LOG_DEBUG, "reserved"); + else if(startcode<=0x1FF) av_log(s->avctx, AV_LOG_DEBUG, "System start"); + av_log(s->avctx, AV_LOG_DEBUG, " at %d\n", get_bits_count(gb)); + } + + if(startcode >= 0x120 && startcode <= 0x12F){ + if(decode_vol_header(s, gb) < 0) + return -1; + } + else if(startcode == USER_DATA_STARTCODE){ + decode_user_data(s, gb); + } + else if(startcode == GOP_STARTCODE){ + mpeg4_decode_gop_header(s, gb); + } + else if(startcode == VOP_STARTCODE){ + return decode_vop_header(s, gb); + } + + align_get_bits(gb); + startcode = 0xff; + } +} + +/* don't understand why they choose a different header ! */ +int intel_h263_decode_picture_header(MpegEncContext *s) +{ + int format; + + /* picture header */ + if (get_bits_long(&s->gb, 22) != 0x20) { + av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n"); + return -1; + } + s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */ + + if (get_bits1(&s->gb) != 1) { + av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n"); + return -1; /* marker */ + } + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n"); + return -1; /* h263 id */ + } + skip_bits1(&s->gb); /* split screen off */ + skip_bits1(&s->gb); /* camera off */ + skip_bits1(&s->gb); /* freeze picture release off */ + + format = get_bits(&s->gb, 3); + if (format != 7) { + av_log(s->avctx, AV_LOG_ERROR, "Intel H263 free format not supported\n"); + return -1; + } + s->h263_plus = 0; + + s->pict_type = I_TYPE + get_bits1(&s->gb); + + s->unrestricted_mv = get_bits1(&s->gb); + s->h263_long_vectors = s->unrestricted_mv; + + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n"); + return -1; /* SAC: off */ + } + if (get_bits1(&s->gb) != 0) { + s->obmc= 1; + av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n"); +// return -1; /* advanced prediction mode: off */ + } + if (get_bits1(&s->gb) != 0) { + av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n"); + return -1; /* PB frame mode */ + } + + /* skip unknown header garbage */ + skip_bits(&s->gb, 41); + + s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); + skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */ + + /* PEI */ + while (get_bits1(&s->gb) != 0) { + skip_bits(&s->gb, 8); + } + s->f_code = 1; + + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + + return 0; +} + +int flv_h263_decode_picture_header(MpegEncContext *s) +{ + int format, width, height; + + /* picture header */ + if (get_bits_long(&s->gb, 17) != 1) { + av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n"); + return -1; + } + format = get_bits(&s->gb, 5); + if (format != 0 && format != 1) { + av_log(s->avctx, AV_LOG_ERROR, "Bad picture format\n"); + return -1; + } + s->h263_flv = format+1; + s->picture_number = get_bits(&s->gb, 8); /* picture timestamp */ + format = get_bits(&s->gb, 3); + switch (format) { + case 0: + width = get_bits(&s->gb, 8); + height = get_bits(&s->gb, 8); + break; + case 1: + width = get_bits(&s->gb, 16); + height = get_bits(&s->gb, 16); + break; + case 2: + width = 352; + height = 288; + break; + case 3: + width = 176; + height = 144; + break; + case 4: + width = 128; + height = 96; + break; + case 5: + width = 320; + height = 240; + break; + case 6: + width = 160; + height = 120; + break; + default: + width = height = 0; + break; + } + if(avcodec_check_dimensions(s->avctx, width, height)) + return -1; + s->width = width; + s->height = height; + + s->pict_type = I_TYPE + get_bits(&s->gb, 2); + s->dropable= s->pict_type > P_TYPE; + if (s->dropable) + s->pict_type = P_TYPE; + + skip_bits1(&s->gb); /* deblocking flag */ + s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); + + s->h263_plus = 0; + + s->unrestricted_mv = 1; + s->h263_long_vectors = 0; + + /* PEI */ + while (get_bits1(&s->gb) != 0) { + skip_bits(&s->gb, 8); + } + s->f_code = 1; + + if(s->avctx->debug & FF_DEBUG_PICT_INFO){ + av_log(s->avctx, AV_LOG_DEBUG, "%c esc_type:%d, qp:%d num:%d\n", + s->dropable ? 'D' : av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number); + } + + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + + return 0; +} diff --git a/mpeg4/src/libavcodec/h263data.h b/mpeg4/src/libavcodec/h263data.h new file mode 100644 index 0000000000000000000000000000000000000000..01bcaedb41225c4e2ccd95a5166da12059780263 --- /dev/null +++ b/mpeg4/src/libavcodec/h263data.h @@ -0,0 +1,285 @@ +/** + * @file h263data.h + * H.263 tables. + */ + + +/* intra MCBPC, mb_type = (intra), then (intraq) */ +const uint8_t intra_MCBPC_code[9] = { 1, 1, 2, 3, 1, 1, 2, 3, 1 }; +const uint8_t intra_MCBPC_bits[9] = { 1, 3, 3, 3, 4, 6, 6, 6, 9 }; + +/* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */ +/* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */ +const uint8_t inter_MCBPC_code[28] = { + 1, 3, 2, 5, + 3, 4, 3, 3, + 3, 7, 6, 5, + 4, 4, 3, 2, + 2, 5, 4, 5, + 1, 0, 0, 0, /* Stuffing */ + 2, 12, 14, 15, +}; +const uint8_t inter_MCBPC_bits[28] = { + 1, 4, 4, 6, /* inter */ + 5, 8, 8, 7, /* intra */ + 3, 7, 7, 9, /* interQ */ + 6, 9, 9, 9, /* intraQ */ + 3, 7, 7, 8, /* inter4 */ + 9, 0, 0, 0, /* Stuffing */ + 11, 13, 13, 13,/* inter4Q*/ +}; + +static const uint8_t h263_mbtype_b_tab[15][2] = { + {1, 1}, + {3, 3}, + {1, 5}, + {4, 4}, + {5, 4}, + {6, 6}, + {2, 4}, + {3, 4}, + {7, 6}, + {4, 6}, + {5, 6}, + {1, 6}, + {1,10}, + {1, 7}, + {1, 8}, +}; + +static const int h263_mb_type_b_map[15]= { + MB_TYPE_DIRECT2 | MB_TYPE_L0L1, + MB_TYPE_DIRECT2 | MB_TYPE_L0L1 | MB_TYPE_CBP, + MB_TYPE_DIRECT2 | MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT, + MB_TYPE_L0 | MB_TYPE_16x16, + MB_TYPE_L0 | MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_L0 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16, + MB_TYPE_L1 | MB_TYPE_16x16, + MB_TYPE_L1 | MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16, + MB_TYPE_L0L1 | MB_TYPE_16x16, + MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_L0L1 | MB_TYPE_CBP | MB_TYPE_QUANT | MB_TYPE_16x16, + 0, //stuffing + MB_TYPE_INTRA4x4 | MB_TYPE_CBP, + MB_TYPE_INTRA4x4 | MB_TYPE_CBP | MB_TYPE_QUANT, +}; + +const uint8_t cbpc_b_tab[4][2] = { +{0, 1}, +{2, 2}, +{7, 3}, +{6, 3}, +}; + +const uint8_t cbpy_tab[16][2] = +{ + {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4}, + {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2} +}; + +const uint8_t mvtab[33][2] = +{ + {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7}, + {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, + {12,10}, {11,10}, {10,10}, {9,10}, {8,10}, {7,10}, {6,10}, {5,10}, + {4,10}, {7,11}, {6,11}, {5,11}, {4,11}, {3,11}, {2,11}, {3,12}, + {2,12} +}; + +/* third non intra table */ +const uint16_t inter_vlc[103][2] = { +{ 0x2, 2 },{ 0xf, 4 },{ 0x15, 6 },{ 0x17, 7 }, +{ 0x1f, 8 },{ 0x25, 9 },{ 0x24, 9 },{ 0x21, 10 }, +{ 0x20, 10 },{ 0x7, 11 },{ 0x6, 11 },{ 0x20, 11 }, +{ 0x6, 3 },{ 0x14, 6 },{ 0x1e, 8 },{ 0xf, 10 }, +{ 0x21, 11 },{ 0x50, 12 },{ 0xe, 4 },{ 0x1d, 8 }, +{ 0xe, 10 },{ 0x51, 12 },{ 0xd, 5 },{ 0x23, 9 }, +{ 0xd, 10 },{ 0xc, 5 },{ 0x22, 9 },{ 0x52, 12 }, +{ 0xb, 5 },{ 0xc, 10 },{ 0x53, 12 },{ 0x13, 6 }, +{ 0xb, 10 },{ 0x54, 12 },{ 0x12, 6 },{ 0xa, 10 }, +{ 0x11, 6 },{ 0x9, 10 },{ 0x10, 6 },{ 0x8, 10 }, +{ 0x16, 7 },{ 0x55, 12 },{ 0x15, 7 },{ 0x14, 7 }, +{ 0x1c, 8 },{ 0x1b, 8 },{ 0x21, 9 },{ 0x20, 9 }, +{ 0x1f, 9 },{ 0x1e, 9 },{ 0x1d, 9 },{ 0x1c, 9 }, +{ 0x1b, 9 },{ 0x1a, 9 },{ 0x22, 11 },{ 0x23, 11 }, +{ 0x56, 12 },{ 0x57, 12 },{ 0x7, 4 },{ 0x19, 9 }, +{ 0x5, 11 },{ 0xf, 6 },{ 0x4, 11 },{ 0xe, 6 }, +{ 0xd, 6 },{ 0xc, 6 },{ 0x13, 7 },{ 0x12, 7 }, +{ 0x11, 7 },{ 0x10, 7 },{ 0x1a, 8 },{ 0x19, 8 }, +{ 0x18, 8 },{ 0x17, 8 },{ 0x16, 8 },{ 0x15, 8 }, +{ 0x14, 8 },{ 0x13, 8 },{ 0x18, 9 },{ 0x17, 9 }, +{ 0x16, 9 },{ 0x15, 9 },{ 0x14, 9 },{ 0x13, 9 }, +{ 0x12, 9 },{ 0x11, 9 },{ 0x7, 10 },{ 0x6, 10 }, +{ 0x5, 10 },{ 0x4, 10 },{ 0x24, 11 },{ 0x25, 11 }, +{ 0x26, 11 },{ 0x27, 11 },{ 0x58, 12 },{ 0x59, 12 }, +{ 0x5a, 12 },{ 0x5b, 12 },{ 0x5c, 12 },{ 0x5d, 12 }, +{ 0x5e, 12 },{ 0x5f, 12 },{ 0x3, 7 }, +}; + +const int8_t inter_level[102] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 1, 2, 3, 4, + 5, 6, 1, 2, 3, 4, 1, 2, + 3, 1, 2, 3, 1, 2, 3, 1, + 2, 3, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 3, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +}; + +const int8_t inter_run[102] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, + 3, 4, 4, 4, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 0, 0, 0, 1, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, +}; + +static RLTable rl_inter = { + 102, + 58, + inter_vlc, + inter_run, + inter_level, +}; + +const uint16_t intra_vlc_aic[103][2] = { +{ 0x2, 2 }, { 0x6, 3 }, { 0xe, 4 }, { 0xc, 5 }, +{ 0xd, 5 }, { 0x10, 6 }, { 0x11, 6 }, { 0x12, 6 }, +{ 0x16, 7 }, { 0x1b, 8 }, { 0x20, 9 }, { 0x21, 9 }, +{ 0x1a, 9 }, { 0x1b, 9 }, { 0x1c, 9 }, { 0x1d, 9 }, +{ 0x1e, 9 }, { 0x1f, 9 }, { 0x23, 11 }, { 0x22, 11 }, +{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 }, +{ 0x53, 12 }, { 0xf, 4 }, { 0x14, 6 }, { 0x14, 7 }, +{ 0x1e, 8 }, { 0xf, 10 }, { 0x21, 11 }, { 0x50, 12 }, +{ 0xb, 5 }, { 0x15, 7 }, { 0xe, 10 }, { 0x9, 10 }, +{ 0x15, 6 }, { 0x1d, 8 }, { 0xd, 10 }, { 0x51, 12 }, +{ 0x13, 6 }, { 0x23, 9 }, { 0x7, 11 }, { 0x17, 7 }, +{ 0x22, 9 }, { 0x52, 12 }, { 0x1c, 8 }, { 0xc, 10 }, +{ 0x1f, 8 }, { 0xb, 10 }, { 0x25, 9 }, { 0xa, 10 }, +{ 0x24, 9 }, { 0x6, 11 }, { 0x21, 10 }, { 0x20, 10 }, +{ 0x8, 10 }, { 0x20, 11 }, { 0x7, 4 }, { 0xc, 6 }, +{ 0x10, 7 }, { 0x13, 8 }, { 0x11, 9 }, { 0x12, 9 }, +{ 0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 }, +{ 0xf, 6 }, { 0x13, 9 }, { 0x5, 10 }, { 0x25, 11 }, +{ 0xe, 6 }, { 0x14, 9 }, { 0x24, 11 }, { 0xd, 6 }, +{ 0x6, 10 }, { 0x5e, 12 }, { 0x11, 7 }, { 0x7, 10 }, +{ 0x13, 7 }, { 0x5d, 12 }, { 0x12, 7 }, { 0x5c, 12 }, +{ 0x14, 8 }, { 0x5b, 12 }, { 0x15, 8 }, { 0x1a, 8 }, +{ 0x19, 8 }, { 0x18, 8 }, { 0x17, 8 }, { 0x16, 8 }, +{ 0x19, 9 }, { 0x15, 9 }, { 0x16, 9 }, { 0x18, 9 }, +{ 0x17, 9 }, { 0x4, 11 }, { 0x5, 11 }, { 0x58, 12 }, +{ 0x59, 12 }, { 0x5a, 12 }, { 0x3, 7 }, +}; + +const int8_t intra_run_aic[102] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 5, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 11, +12, 13, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 3, 3, 3, 4, 4, + 5, 5, 6, 6, 7, 7, 8, 9, +10, 11, 12, 13, 14, 15, 16, 17, +18, 19, 20, 21, 22, 23, +}; + +const int8_t intra_level_aic[102] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, +17, 18, 19, 20, 21, 22, 23, 24, +25, 1, 2, 3, 4, 5, 6, 7, + 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 1, 2, 3, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 1, + 1, 1, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 1, 2, 3, 4, + 1, 2, 3, 1, 2, 3, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +}; + +static RLTable rl_intra_aic = { + 102, + 58, + intra_vlc_aic, + intra_run_aic, + intra_level_aic, +}; + +static const uint8_t wrong_run[102] = { + 1, 2, 3, 5, 4, 10, 9, 8, +11, 15, 17, 16, 23, 22, 21, 20, +19, 18, 25, 24, 27, 26, 11, 7, + 6, 1, 2, 13, 2, 2, 2, 2, + 6, 12, 3, 9, 1, 3, 4, 3, + 7, 4, 1, 1, 5, 5, 14, 6, + 1, 7, 1, 8, 1, 1, 1, 1, +10, 1, 1, 5, 9, 17, 25, 24, +29, 33, 32, 41, 2, 23, 28, 31, + 3, 22, 30, 4, 27, 40, 8, 26, + 6, 39, 7, 38, 16, 37, 15, 10, +11, 12, 13, 14, 1, 21, 20, 18, +19, 2, 1, 34, 35, 36 +}; + +static const uint16_t h263_format[8][2] = { + { 0, 0 }, + { 128, 96 }, + { 176, 144 }, + { 352, 288 }, + { 704, 576 }, + { 1408, 1152 }, +}; + +const uint8_t ff_aic_dc_scale_table[32]={ +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0, 2, 4, 6, 8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62 +}; + +static const uint8_t modified_quant_tab[2][32]={ +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +{ + 0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,10,11,12,13,14,15,16,17,18,18,19,20,21,22,23,24,25,26,27,28 +},{ + 0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29,30,31,31,31,26 +} +}; + +const uint8_t ff_h263_chroma_qscale_table[32]={ +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0, 1, 2, 3, 4, 5, 6, 6, 7, 8, 9, 9,10,10,11,11,12,12,12,13,13,13,14,14,14,14,14,15,15,15,15,15 +}; + +const uint16_t ff_mba_max[6]={ + 47, 98, 395,1583,6335,9215 +}; + +const uint8_t ff_mba_length[7]={ + 6, 7, 9, 11, 13, 14, 14 +}; + +const uint8_t ff_h263_loop_filter_strength[32]={ +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9,10,10,10,11,11,11,12,12,12 +}; + diff --git a/mpeg4/src/libavcodec/h263dec.c b/mpeg4/src/libavcodec/h263dec.c new file mode 100644 index 0000000000000000000000000000000000000000..30303f8edf2f365382b8cd27fc54b64b5fdd8881 --- /dev/null +++ b/mpeg4/src/libavcodec/h263dec.c @@ -0,0 +1,895 @@ +/* + * H.263 decoder + * Copyright (c) 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file h263dec.c + * H.263 decoder. + */ + +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +//#define DEBUG +//#define PRINT_FRAME_TIME + +int ff_h263_decode_init(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + + s->avctx = avctx; + s->out_format = FMT_H263; + + s->width = avctx->coded_width; + s->height = avctx->coded_height; + s->workaround_bugs= avctx->workaround_bugs; + + // set defaults + MPV_decode_defaults(s); + s->quant_precision=5; + s->decode_mb= ff_h263_decode_mb; + s->low_delay= 1; + avctx->pix_fmt= PIX_FMT_YUV420P; + s->unrestricted_mv= 1; + + /* select sub codec */ + switch(avctx->codec->id) { + case CODEC_ID_H263: + s->unrestricted_mv= 0; + break; + case CODEC_ID_MPEG4: + s->decode_mb= ff_mpeg4_decode_mb; + s->time_increment_bits = 4; /* default value for broken headers */ + s->h263_pred = 1; + s->low_delay = 0; //default, might be overriden in the vol header during header parsing + break; + case CODEC_ID_MSMPEG4V1: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=1; + break; + case CODEC_ID_MSMPEG4V2: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=2; + break; + case CODEC_ID_MSMPEG4V3: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=3; + break; + case CODEC_ID_WMV1: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=4; + break; + case CODEC_ID_WMV2: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=5; + break; + case CODEC_ID_WMV3: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=6; + break; + case CODEC_ID_H263I: + break; + case CODEC_ID_FLV1: + s->h263_flv = 1; + break; + default: + return -1; + } + s->codec_id= avctx->codec->id; + + /* for h263, we allocate the images after having read the header */ + if (avctx->codec->id != CODEC_ID_H263 && avctx->codec->id != CODEC_ID_MPEG4) + if (MPV_common_init(s) < 0) + return -1; + + if (s->h263_msmpeg4) + ff_msmpeg4_decode_init(s); + else + h263_decode_init_vlc(s); + + return 0; +} + +int ff_h263_decode_end(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + + MPV_common_end(s); + return 0; +} + +/** + * returns the number of bytes consumed for building the current frame + */ +static int get_consumed_bytes(MpegEncContext *s, int buf_size){ + int pos= (get_bits_count(&s->gb)+7)>>3; + + if(s->divx_packed){ + //we would have to scan through the whole buf to handle the weird reordering ... + return buf_size; + }else if(s->flags&CODEC_FLAG_TRUNCATED){ + pos -= s->parse_context.last_index; + if(pos<0) pos=0; // padding is not really read so this might be -1 + return pos; + }else{ + if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...) + if(pos+10>buf_size) pos=buf_size; // oops ;) + + return pos; + } +} + +static int decode_slice(MpegEncContext *s){ + const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; + const int mb_size= 16>>s->avctx->lowres; + s->last_resync_gb= s->gb; + s->first_slice_line= 1; + + s->resync_mb_x= s->mb_x; + s->resync_mb_y= s->mb_y; + + ff_set_qscale(s, s->qscale); + + if(s->partitioned_frame){ + const int qscale= s->qscale; + + if(s->codec_id==CODEC_ID_MPEG4){ + if(ff_mpeg4_decode_partitions(s) < 0) + return -1; + } + + /* restore variables which were modified */ + s->first_slice_line=1; + s->mb_x= s->resync_mb_x; + s->mb_y= s->resync_mb_y; + ff_set_qscale(s, qscale); + } + + for(; s->mb_y < s->mb_height; s->mb_y++) { + /* per-row end of slice checks */ + if(s->msmpeg4_version){ + if(s->resync_mb_y + s->slice_height == s->mb_y){ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END); + + return 0; + } + } + + if(s->msmpeg4_version==1){ + s->last_dc[0]= + s->last_dc[1]= + s->last_dc[2]= 128; + } + + ff_init_block_index(s); + for(; s->mb_x < s->mb_width; s->mb_x++) { + int ret; + + ff_update_block_index(s); + + if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){ + s->first_slice_line=0; + } + + /* DCT & quantize */ + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; +// s->mb_skipped = 0; +//printf("%d %d %06X\n", ret, get_bits_count(&s->gb), show_bits(&s->gb, 24)); + ret= s->decode_mb(s, s->block); + + if (s->pict_type!=B_TYPE) + ff_h263_update_motion_val(s); + + if(ret<0){ + const int xy= s->mb_x + s->mb_y*s->mb_stride; + if(ret==SLICE_END){ + MPV_decode_mb(s, s->block); + if(s->loop_filter) + ff_h263_loop_filter(s); + +//printf("%d %d %d %06X\n", s->mb_x, s->mb_y, s->gb.size*8 - get_bits_count(&s->gb), show_bits(&s->gb, 24)); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + s->padding_bug_score--; + + if(++s->mb_x >= s->mb_width){ + s->mb_x=0; + ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size); + s->mb_y++; + } + return 0; + }else if(ret==SLICE_NOEND){ + av_log(s->avctx, AV_LOG_ERROR, "Slice mismatch at MB: %d\n", xy); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x+1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + return -1; + } + av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", xy); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); + + return -1; + } + + MPV_decode_mb(s, s->block); + if(s->loop_filter) + ff_h263_loop_filter(s); + } + + ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size); + + s->mb_x= 0; + } + + assert(s->mb_x==0 && s->mb_y==s->mb_height); + + /* try to detect the padding bug */ + if( s->codec_id==CODEC_ID_MPEG4 + && (s->workaround_bugs&FF_BUG_AUTODETECT) + && s->gb.size_in_bits - get_bits_count(&s->gb) >=0 + && s->gb.size_in_bits - get_bits_count(&s->gb) < 48 +// && !s->resync_marker + && !s->data_partitioning){ + + const int bits_count= get_bits_count(&s->gb); + const int bits_left = s->gb.size_in_bits - bits_count; + + if(bits_left==0){ + s->padding_bug_score+=16; + } else if(bits_left != 1){ + int v= show_bits(&s->gb, 8); + v|= 0x7F >> (7-(bits_count&7)); + + if(v==0x7F && bits_left<=8) + s->padding_bug_score--; + else if(v==0x7F && ((get_bits_count(&s->gb)+8)&8) && bits_left<=16) + s->padding_bug_score+= 4; + else + s->padding_bug_score++; + } + } + + if(s->workaround_bugs&FF_BUG_AUTODETECT){ + if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version || !s->resync_marker)*/) + s->workaround_bugs |= FF_BUG_NO_PADDING; + else + s->workaround_bugs &= ~FF_BUG_NO_PADDING; + } + + // handle formats which don't have unique end markers + if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly + int left= s->gb.size_in_bits - get_bits_count(&s->gb); + int max_extra=7; + + /* no markers in M$ crap */ + if(s->msmpeg4_version && s->pict_type==I_TYPE) + max_extra+= 17; + + /* buggy padding but the frame should still end approximately at the bitstream end */ + if((s->workaround_bugs&FF_BUG_NO_PADDING) && s->error_resilience>=3) + max_extra+= 48; + else if((s->workaround_bugs&FF_BUG_NO_PADDING)) + max_extra+= 256*256*256*64; + + if(left>max_extra){ + av_log(s->avctx, AV_LOG_ERROR, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24)); + } + else if(left<0){ + av_log(s->avctx, AV_LOG_ERROR, "overreading %d bits\n", -left); + }else + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END); + + return 0; + } + + av_log(s->avctx, AV_LOG_ERROR, "slice end not reached but screenspace end (%d left %06X, score= %d)\n", + s->gb.size_in_bits - get_bits_count(&s->gb), + show_bits(&s->gb, 24), s->padding_bug_score); + + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return -1; +} + +/** + * finds the end of the current frame in the bitstream. + * @return the position of the first byte of the next frame, or -1 + */ +int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){ + int vop_found, i; + uint32_t state; + + vop_found= pc->frame_start_found; + state= pc->state; + + i=0; + if(!vop_found){ + for(i=0; iframe_start_found=0; + pc->state=-1; + return i-3; + } + } + } + pc->frame_start_found= vop_found; + pc->state= state; + return END_NOT_FOUND; +} + +static int h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){ + int vop_found, i; + uint32_t state; + + vop_found= pc->frame_start_found; + state= pc->state; + + i=0; + if(!vop_found){ + for(i=0; i>(32-22) == 0x20){ + i++; + vop_found=1; + break; + } + } + } + + if(vop_found){ + for(; i>(32-22) == 0x20){ + pc->frame_start_found=0; + pc->state=-1; + return i-3; + } + } + } + pc->frame_start_found= vop_found; + pc->state= state; + + return END_NOT_FOUND; +} + +static int h263_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + ParseContext *pc = s->priv_data; + int next; + + next= h263_find_frame_end(pc, buf, buf_size); + + if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + + *poutbuf = (uint8_t *)buf; + *poutbuf_size = buf_size; + return next; +} + +int ff_h263_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + MpegEncContext *s = avctx->priv_data; + int ret; + AVFrame *pict = data; + +#ifdef PRINT_FRAME_TIME +uint64_t time= rdtsc(); +#endif +#ifdef DEBUG + printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); + printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); +#endif + s->flags= avctx->flags; + s->flags2= avctx->flags2; + + /* no supplementary picture */ + if (buf_size == 0) { + /* special case for last picture */ + if (s->low_delay==0 && s->next_picture_ptr) { + *pict= *(AVFrame*)s->next_picture_ptr; + s->next_picture_ptr= NULL; + + *data_size = sizeof(AVFrame); + } + + return 0; + } + + if(s->flags&CODEC_FLAG_TRUNCATED){ + int next; + + if(s->codec_id==CODEC_ID_MPEG4){ + next= ff_mpeg4_find_frame_end(&s->parse_context, buf, buf_size); + }else if(s->codec_id==CODEC_ID_H263){ + next= h263_find_frame_end(&s->parse_context, buf, buf_size); + }else{ + av_log(s->avctx, AV_LOG_ERROR, "this codec does not support truncated bitstreams\n"); + return -1; + } + + if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 ) + return buf_size; + } + + +retry: + + if(s->bitstream_buffer_size && (s->divx_packed || buf_size<20)){ //divx 5.01+/xvid frame reorder + init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size*8); + }else + init_get_bits(&s->gb, buf, buf_size*8); + s->bitstream_buffer_size=0; + + if (!s->context_initialized) { + if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix + return -1; + } + + //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there + if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){ + int i= ff_find_unused_picture(s, 0); + s->current_picture_ptr= &s->picture[i]; + } + + /* let's go :-) */ + if (s->msmpeg4_version==5) { + ret= ff_wmv2_decode_picture_header(s); + } else if (s->msmpeg4_version) { + ret = msmpeg4_decode_picture_header(s); + } else if (s->h263_pred) { + if(s->avctx->extradata_size && s->picture_number==0){ + GetBitContext gb; + + init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8); + ret = ff_mpeg4_decode_picture_header(s, &gb); + } + ret = ff_mpeg4_decode_picture_header(s, &s->gb); + + if(s->flags& CODEC_FLAG_LOW_DELAY) + s->low_delay=1; + } else if (s->codec_id == CODEC_ID_H263I) { + ret = intel_h263_decode_picture_header(s); + } else if (s->h263_flv) { + ret = flv_h263_decode_picture_header(s); + } else { + ret = h263_decode_picture_header(s); + } + + if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_size); + + /* skip if the header was thrashed */ + if (ret < 0){ + av_log(s->avctx, AV_LOG_ERROR, "header damaged\n"); + return -1; + } + + avctx->has_b_frames= !s->low_delay; + + if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){ + if(s->avctx->stream_codec_tag == ff_get_fourcc("XVID") || + s->avctx->codec_tag == ff_get_fourcc("XVID") || s->avctx->codec_tag == ff_get_fourcc("XVIX") || + s->avctx->codec_tag == ff_get_fourcc("RMP4")) + s->xvid_build= -1; +#if 0 + if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==1 + && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc + s->xvid_build= -1; +#endif + } + + if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){ + if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==0) + s->divx_version= 400; //divx 4 + } + + if(s->xvid_build && s->divx_version){ + s->divx_version= + s->divx_build= 0; + } + + if(s->workaround_bugs&FF_BUG_AUTODETECT){ + if(s->avctx->codec_tag == ff_get_fourcc("XVIX")) + s->workaround_bugs|= FF_BUG_XVID_ILACE; + + if(s->avctx->codec_tag == ff_get_fourcc("UMP4")){ + s->workaround_bugs|= FF_BUG_UMP4; + } + + if(s->divx_version>=500){ + s->workaround_bugs|= FF_BUG_QPEL_CHROMA; + } + + if(s->divx_version>502){ + s->workaround_bugs|= FF_BUG_QPEL_CHROMA2; + } + + if(s->xvid_build && s->xvid_build<=3) + s->padding_bug_score= 256*256*256*64; + + if(s->xvid_build && s->xvid_build<=1) + s->workaround_bugs|= FF_BUG_QPEL_CHROMA; + + if(s->xvid_build && s->xvid_build<=12) + s->workaround_bugs|= FF_BUG_EDGE; + + if(s->xvid_build && s->xvid_build<=32) + s->workaround_bugs|= FF_BUG_DC_CLIP; + +#define SET_QPEL_FUNC(postfix1, postfix2) \ + s->dsp.put_ ## postfix1 = ff_put_ ## postfix2;\ + s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\ + s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2; + + if(s->lavc_build && s->lavc_build<4653) + s->workaround_bugs|= FF_BUG_STD_QPEL; + + if(s->lavc_build && s->lavc_build<4655) + s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE; + + if(s->lavc_build && s->lavc_build<4670){ + s->workaround_bugs|= FF_BUG_EDGE; + } + + if(s->lavc_build && s->lavc_build<=4712) + s->workaround_bugs|= FF_BUG_DC_CLIP; + + if(s->divx_version) + s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE; +//printf("padding_bug_score: %d\n", s->padding_bug_score); + if(s->divx_version==501 && s->divx_build==20020416) + s->padding_bug_score= 256*256*256*64; + + if(s->divx_version && s->divx_version<500){ + s->workaround_bugs|= FF_BUG_EDGE; + } + + if(s->divx_version) + s->workaround_bugs|= FF_BUG_HPEL_CHROMA; +#if 0 + if(s->divx_version==500) + s->padding_bug_score= 256*256*256*64; + + /* very ugly XVID padding bug detection FIXME/XXX solve this differently + * lets hope this at least works + */ + if( s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==0 + && s->codec_id==CODEC_ID_MPEG4 && s->vo_type==0) + s->workaround_bugs|= FF_BUG_NO_PADDING; + + if(s->lavc_build && s->lavc_build<4609) //FIXME not sure about the version num but a 4609 file seems ok + s->workaround_bugs|= FF_BUG_NO_PADDING; +#endif + } + + if(s->workaround_bugs& FF_BUG_STD_QPEL){ + SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_old_c) + + SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_old_c) + SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_old_c) + } + + if(avctx->debug & FF_DEBUG_BUGS) + av_log(s->avctx, AV_LOG_DEBUG, "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n", + s->workaround_bugs, s->lavc_build, s->xvid_build, s->divx_version, s->divx_build, + s->divx_packed ? "p" : ""); + +#if 0 // dump bits per frame / qp / complexity +{ + static FILE *f=NULL; + if(!f) f=fopen("rate_qp_cplx.txt", "w"); + fprintf(f, "%d %d %f\n", buf_size, s->qscale, buf_size*(double)s->qscale); +} +#endif + +#if defined(HAVE_MMX) && defined(CONFIG_GPL) + if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & MM_MMX)){ + avctx->idct_algo= FF_IDCT_XVIDMMX; + avctx->coded_width= 0; // force reinit +// dsputil_init(&s->dsp, avctx); + s->picture_number=0; + } +#endif + + /* After H263 & mpeg4 header decode we have the height, width,*/ + /* and other parameters. So then we could init the picture */ + /* FIXME: By the way H263 decoder is evolving it should have */ + /* an H263EncContext */ + + if ( s->width != avctx->coded_width + || s->height != avctx->coded_height) { + /* H.263 could change picture size any time */ + ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat + s->parse_context.buffer=0; + MPV_common_end(s); + s->parse_context= pc; + } + if (!s->context_initialized) { + avcodec_set_dimensions(avctx, s->width, s->height); + + goto retry; + } + + if((s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)) + s->gob_index = ff_h263_get_gob_height(s); + + // for hurry_up==5 + s->current_picture.pict_type= s->pict_type; + s->current_picture.key_frame= s->pict_type == I_TYPE; + + /* skip B-frames if we don't have reference frames */ + if(s->last_picture_ptr==NULL && (s->pict_type==B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size); + /* skip b frames if we are in a hurry */ + if(avctx->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size); + if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE) + || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE) + || avctx->skip_frame >= AVDISCARD_ALL) + return get_consumed_bytes(s, buf_size); + /* skip everything if we are in a hurry>=5 */ + if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size); + + if(s->next_p_frame_damaged){ + if(s->pict_type==B_TYPE) + return get_consumed_bytes(s, buf_size); + else + s->next_p_frame_damaged=0; + } + + if(MPV_frame_start(s, avctx) < 0) + return -1; + +#ifdef DEBUG + av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale); +#endif + + ff_er_frame_start(s); + + //the second part of the wmv2 header contains the MB skip bits which are stored in current_picture->mb_type + //which isnt available before MPV_frame_start() + if (s->msmpeg4_version==5){ + if(ff_wmv2_decode_secondary_picture_header(s) < 0) + return -1; + } + + /* decode each macroblock */ + s->mb_x=0; + s->mb_y=0; + + decode_slice(s); + while(s->mb_ymb_height){ + if(s->msmpeg4_version){ + if(s->mb_x!=0 || (s->mb_y%s->slice_height)!=0 || get_bits_count(&s->gb) > s->gb.size_in_bits) + break; + }else{ + if(ff_h263_resync(s)<0) + break; + } + + if(s->msmpeg4_version<4 && s->h263_pred) + ff_mpeg4_clean_buffers(s); + + decode_slice(s); + } + + if (s->h263_msmpeg4 && s->msmpeg4_version<4 && s->pict_type==I_TYPE) + if(msmpeg4_decode_ext_header(s, buf_size) < 0){ + s->error_status_table[s->mb_num-1]= AC_ERROR|DC_ERROR|MV_ERROR; + } + + /* divx 5.01+ bistream reorder stuff */ + if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0 && s->divx_packed){ + int current_pos= get_bits_count(&s->gb)>>3; + int startcode_found=0; + + if(buf_size - current_pos > 5){ + int i; + for(i=current_pos; igb.buffer == s->bitstream_buffer && buf_size>20){ //xvid style + startcode_found=1; + current_pos=0; + } + + if(startcode_found){ + s->bitstream_buffer= av_fast_realloc( + s->bitstream_buffer, + &s->allocated_bitstream_buffer_size, + buf_size - current_pos + FF_INPUT_BUFFER_PADDING_SIZE); + memcpy(s->bitstream_buffer, buf + current_pos, buf_size - current_pos); + s->bitstream_buffer_size= buf_size - current_pos; + } + } + + ff_er_frame_end(s); + + MPV_frame_end(s); + +assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type); +assert(s->current_picture.pict_type == s->pict_type); + if (s->pict_type == B_TYPE || s->low_delay) { + *pict= *(AVFrame*)s->current_picture_ptr; + } else if (s->last_picture_ptr != NULL) { + *pict= *(AVFrame*)s->last_picture_ptr; + } + + if(s->last_picture_ptr || s->low_delay){ + *data_size = sizeof(AVFrame); + ff_print_debug_info(s, pict); + } + + /* Return the Picture timestamp as the frame number */ + /* we substract 1 because it is added on utils.c */ + avctx->frame_number = s->picture_number - 1; + +#ifdef PRINT_FRAME_TIME +av_log(avctx, AV_LOG_DEBUG, "%Ld\n", rdtsc()-time); +#endif + + return get_consumed_bytes(s, buf_size); +} + +AVCodec mpeg4_decoder = { + "mpeg4", + CODEC_TYPE_VIDEO, + CODEC_ID_MPEG4, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, + .flush= ff_mpeg_flush, +}; + +AVCodec h263_decoder = { + "h263", + CODEC_TYPE_VIDEO, + CODEC_ID_H263, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, + .flush= ff_mpeg_flush, +}; + +AVCodec msmpeg4v1_decoder = { + "msmpeg4v1", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V1, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, +}; + +AVCodec msmpeg4v2_decoder = { + "msmpeg4v2", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V2, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, +}; + +AVCodec msmpeg4v3_decoder = { + "msmpeg4", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4V3, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, +}; + +AVCodec wmv1_decoder = { + "wmv1", + CODEC_TYPE_VIDEO, + CODEC_ID_WMV1, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, +}; + +AVCodec h263i_decoder = { + "h263i", + CODEC_TYPE_VIDEO, + CODEC_ID_H263I, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, +}; + +AVCodec flv_decoder = { + "flv", + CODEC_TYPE_VIDEO, + CODEC_ID_FLV1, + sizeof(MpegEncContext), + ff_h263_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 +}; + +AVCodecParser h263_parser = { + { CODEC_ID_H263 }, + sizeof(ParseContext), + NULL, + h263_parse, + ff_parse_close, +}; diff --git a/mpeg4/src/libavcodec/h264.c b/mpeg4/src/libavcodec/h264.c new file mode 100644 index 0000000000000000000000000000000000000000..15fdfcfc3abfb24f72d60a6b152da633743fcfbf --- /dev/null +++ b/mpeg4/src/libavcodec/h264.c @@ -0,0 +1,8090 @@ +/* + * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file h264.c + * H.264 / AVC / MPEG4 part10 codec. + * @author Michael Niedermayer + */ + +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" +#include "h264data.h" +#include "golomb.h" + +#include "cabac.h" + +#undef NDEBUG +#include + +#define interlaced_dct interlaced_dct_is_a_bad_name +#define mb_intra mb_intra_isnt_initalized_see_mb_type + +#define LUMA_DC_BLOCK_INDEX 25 +#define CHROMA_DC_BLOCK_INDEX 26 + +#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 +#define COEFF_TOKEN_VLC_BITS 8 +#define TOTAL_ZEROS_VLC_BITS 9 +#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 +#define RUN_VLC_BITS 3 +#define RUN7_VLC_BITS 6 + +#define MAX_SPS_COUNT 32 +#define MAX_PPS_COUNT 256 + +#define MAX_MMCO_COUNT 66 + +/** + * Sequence parameter set + */ +typedef struct SPS{ + + int profile_idc; + int level_idc; + int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag + int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4 + int poc_type; ///< pic_order_cnt_type + int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4 + int delta_pic_order_always_zero_flag; + int offset_for_non_ref_pic; + int offset_for_top_to_bottom_field; + int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle + int ref_frame_count; ///< num_ref_frames + int gaps_in_frame_num_allowed_flag; + int mb_width; ///< frame_width_in_mbs_minus1 + 1 + int mb_height; ///< frame_height_in_mbs_minus1 + 1 + int frame_mbs_only_flag; + int mb_aff; ///b4_stride + int b8_stride; + + int halfpel_flag; + int thirdpel_flag; + + int unknown_svq3_flag; + int next_slice_index; + + SPS sps_buffer[MAX_SPS_COUNT]; + SPS sps; ///< current sps + + PPS pps_buffer[MAX_PPS_COUNT]; + /** + * current pps + */ + PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? + + uint32_t dequant4_buffer[6][52][16]; + uint32_t dequant8_buffer[2][52][64]; + uint32_t (*dequant4_coeff[6])[16]; + uint32_t (*dequant8_coeff[2])[64]; + int dequant_coeff_pps; ///< reinit tables when pps changes + + int slice_num; + uint8_t *slice_table_base; + uint8_t *slice_table; ///< slice_table_base + mb_stride + 1 + int slice_type; + int slice_type_fixed; + + //interlacing specific flags + int mb_aff_frame; + int mb_field_decoding_flag; + + int sub_mb_type[4]; + + //POC stuff + int poc_lsb; + int poc_msb; + int delta_poc_bottom; + int delta_poc[2]; + int frame_num; + int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0 + int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0 + int frame_num_offset; ///< for POC type 2 + int prev_frame_num_offset; ///< for POC type 2 + int prev_frame_num; ///< frame_num of the last pic for POC type 1/2 + + /** + * frame_num for frames or 2*frame_num for field pics. + */ + int curr_pic_num; + + /** + * max_frame_num or 2*max_frame_num for field pics. + */ + int max_pic_num; + + //Weighted pred stuff + int use_weight; + int use_weight_chroma; + int luma_log2_weight_denom; + int chroma_log2_weight_denom; + int luma_weight[2][16]; + int luma_offset[2][16]; + int chroma_weight[2][16][2]; + int chroma_offset[2][16][2]; + int implicit_weight[16][16]; + + //deblock + int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 + int slice_alpha_c0_offset; + int slice_beta_offset; + + int redundant_pic_count; + + int direct_spatial_mv_pred; + int dist_scale_factor[16]; + int map_col_to_list0[2][16]; + + /** + * num_ref_idx_l0/1_active_minus1 + 1 + */ + int ref_count[2];// FIXME split for AFF + Picture *short_ref[32]; + Picture *long_ref[32]; + Picture default_ref_list[2][32]; + Picture ref_list[2][32]; //FIXME size? + Picture field_ref_list[2][32]; //FIXME size? + Picture *delayed_pic[16]; //FIXME size? + Picture *delayed_output_pic; + + /** + * memory management control operations buffer. + */ + MMCO mmco[MAX_MMCO_COUNT]; + int mmco_index; + + int long_ref_count; ///< number of actual long term references + int short_ref_count; ///< number of actual short term references + + //data partitioning + GetBitContext intra_gb; + GetBitContext inter_gb; + GetBitContext *intra_gb_ptr; + GetBitContext *inter_gb_ptr; + + DECLARE_ALIGNED_8(DCTELEM, mb[16*24]); + + /** + * Cabac + */ + CABACContext cabac; + uint8_t cabac_state[460]; + int cabac_init_idc; + + /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ + uint16_t *cbp_table; + int top_cbp; + int left_cbp; + /* chroma_pred_mode for i4x4 or i16x16, else 0 */ + uint8_t *chroma_pred_mode_table; + int last_qscale_diff; + int16_t (*mvd_table[2])[2]; + DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]); + uint8_t *direct_table; + uint8_t direct_cache[5*8]; + + uint8_t zigzag_scan[16]; + uint8_t field_scan[16]; + uint8_t zigzag_scan8x8[64]; + uint8_t zigzag_scan8x8_cavlc[64]; + const uint8_t *zigzag_scan_q0; + const uint8_t *field_scan_q0; + const uint8_t *zigzag_scan8x8_q0; + const uint8_t *zigzag_scan8x8_cavlc_q0; + + int x264_build; +}H264Context; + +static VLC coeff_token_vlc[4]; +static VLC chroma_dc_coeff_token_vlc; + +static VLC total_zeros_vlc[15]; +static VLC chroma_dc_total_zeros_vlc[3]; + +static VLC run_vlc[6]; +static VLC run7_vlc; + +static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); +static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); +static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); + +static always_inline uint32_t pack16to32(int a, int b){ +#ifdef WORDS_BIGENDIAN + return (b&0xFFFF) + (a<<16); +#else + return (a&0xFFFF) + (b<<16); +#endif +} + +/** + * fill a rectangle. + * @param h height of the rectangle, should be a constant + * @param w width of the rectangle, should be a constant + * @param size the size of val (1 or 4), should be a constant + */ +static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ + uint8_t *p= (uint8_t*)vp; + assert(size==1 || size==4); + + w *= size; + stride *= size; + + assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0); + assert((stride&(w-1))==0); +//FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it + if(w==2 && h==2){ + *(uint16_t*)(p + 0)= + *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101; + }else if(w==2 && h==4){ + *(uint16_t*)(p + 0*stride)= + *(uint16_t*)(p + 1*stride)= + *(uint16_t*)(p + 2*stride)= + *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101; + }else if(w==4 && h==1){ + *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101; + }else if(w==4 && h==2){ + *(uint32_t*)(p + 0*stride)= + *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101; + }else if(w==4 && h==4){ + *(uint32_t*)(p + 0*stride)= + *(uint32_t*)(p + 1*stride)= + *(uint32_t*)(p + 2*stride)= + *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101; + }else if(w==8 && h==1){ + *(uint32_t*)(p + 0)= + *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101; + }else if(w==8 && h==2){ + *(uint32_t*)(p + 0 + 0*stride)= + *(uint32_t*)(p + 4 + 0*stride)= + *(uint32_t*)(p + 0 + 1*stride)= + *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101; + }else if(w==8 && h==4){ + *(uint64_t*)(p + 0*stride)= + *(uint64_t*)(p + 1*stride)= + *(uint64_t*)(p + 2*stride)= + *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; + }else if(w==16 && h==2){ + *(uint64_t*)(p + 0+0*stride)= + *(uint64_t*)(p + 8+0*stride)= + *(uint64_t*)(p + 0+1*stride)= + *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; + }else if(w==16 && h==4){ + *(uint64_t*)(p + 0+0*stride)= + *(uint64_t*)(p + 8+0*stride)= + *(uint64_t*)(p + 0+1*stride)= + *(uint64_t*)(p + 8+1*stride)= + *(uint64_t*)(p + 0+2*stride)= + *(uint64_t*)(p + 8+2*stride)= + *(uint64_t*)(p + 0+3*stride)= + *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; + }else + assert(0); +} + +static void fill_caches(H264Context *h, int mb_type, int for_deblock){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + int topleft_xy, top_xy, topright_xy, left_xy[2]; + int topleft_type, top_type, topright_type, left_type[2]; + int left_block[8]; + int i; + + //FIXME deblocking can skip fill_caches much of the time with multiple slices too. + // the actual condition is whether we're on the edge of a slice, + // and even then the intra and nnz parts are unnecessary. + if(for_deblock && h->slice_num == 1) + return; + + //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it + + top_xy = mb_xy - s->mb_stride; + topleft_xy = top_xy - 1; + topright_xy= top_xy + 1; + left_xy[1] = left_xy[0] = mb_xy-1; + left_block[0]= 0; + left_block[1]= 1; + left_block[2]= 2; + left_block[3]= 3; + left_block[4]= 7; + left_block[5]= 10; + left_block[6]= 8; + left_block[7]= 11; + if(h->mb_aff_frame){ + const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; + const int top_pair_xy = pair_xy - s->mb_stride; + const int topleft_pair_xy = top_pair_xy - 1; + const int topright_pair_xy = top_pair_xy + 1; + const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); + const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); + const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); + const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); + const int curr_mb_frame_flag = !IS_INTERLACED(mb_type); + const int bottom = (s->mb_y & 1); + tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag); + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock + ) { + top_xy -= s->mb_stride; + } + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock + ) { + topleft_xy -= s->mb_stride; + } + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock + ) { + topright_xy -= s->mb_stride; + } + if (left_mb_frame_flag != curr_mb_frame_flag) { + left_xy[1] = left_xy[0] = pair_xy - 1; + if (curr_mb_frame_flag) { + if (bottom) { + left_block[0]= 2; + left_block[1]= 2; + left_block[2]= 3; + left_block[3]= 3; + left_block[4]= 8; + left_block[5]= 11; + left_block[6]= 8; + left_block[7]= 11; + } else { + left_block[0]= 0; + left_block[1]= 0; + left_block[2]= 1; + left_block[3]= 1; + left_block[4]= 7; + left_block[5]= 10; + left_block[6]= 7; + left_block[7]= 10; + } + } else { + left_xy[1] += s->mb_stride; + //left_block[0]= 0; + left_block[1]= 2; + left_block[2]= 0; + left_block[3]= 2; + //left_block[4]= 7; + left_block[5]= 10; + left_block[6]= 7; + left_block[7]= 10; + } + } + } + + h->top_mb_xy = top_xy; + h->left_mb_xy[0] = left_xy[0]; + h->left_mb_xy[1] = left_xy[1]; + if(for_deblock){ + topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0; + top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0; + topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0; + left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0; + left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0; + }else{ + topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; + top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; + topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; + left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; + left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; + } + + if(IS_INTRA(mb_type)){ + h->topleft_samples_available= + h->top_samples_available= + h->left_samples_available= 0xFFFF; + h->topright_samples_available= 0xEEEA; + + if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){ + h->topleft_samples_available= 0xB3FF; + h->top_samples_available= 0x33FF; + h->topright_samples_available= 0x26EA; + } + for(i=0; i<2; i++){ + if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){ + h->topleft_samples_available&= 0xDF5F; + h->left_samples_available&= 0x5F5F; + } + } + + if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred)) + h->topleft_samples_available&= 0x7FFF; + + if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred)) + h->topright_samples_available&= 0xFBFF; + + if(IS_INTRA4x4(mb_type)){ + if(IS_INTRA4x4(top_type)){ + h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; + h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; + h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; + h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; + }else{ + int pred; + if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred)) + pred= -1; + else{ + pred= 2; + } + h->intra4x4_pred_mode_cache[4+8*0]= + h->intra4x4_pred_mode_cache[5+8*0]= + h->intra4x4_pred_mode_cache[6+8*0]= + h->intra4x4_pred_mode_cache[7+8*0]= pred; + } + for(i=0; i<2; i++){ + if(IS_INTRA4x4(left_type[i])){ + h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; + }else{ + int pred; + if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred)) + pred= -1; + else{ + pred= 2; + } + h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; + } + } + } + } + + +/* +0 . T T. T T T T +1 L . .L . . . . +2 L . .L . . . . +3 . T TL . . . . +4 L . .L . . . . +5 L . .. . . . . +*/ +//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec) + if(top_type){ + h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; + h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; + h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; + h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; + + h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; + h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; + + h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; + h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; + + }else{ + h->non_zero_count_cache[4+8*0]= + h->non_zero_count_cache[5+8*0]= + h->non_zero_count_cache[6+8*0]= + h->non_zero_count_cache[7+8*0]= + + h->non_zero_count_cache[1+8*0]= + h->non_zero_count_cache[2+8*0]= + + h->non_zero_count_cache[1+8*3]= + h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; + + } + + for (i=0; i<2; i++) { + if(left_type[i]){ + h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; + h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; + h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; + h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; + }else{ + h->non_zero_count_cache[3+8*1 + 2*8*i]= + h->non_zero_count_cache[3+8*2 + 2*8*i]= + h->non_zero_count_cache[0+8*1 + 8*i]= + h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; + } + } + + if( h->pps.cabac ) { + // top_cbp + if(top_type) { + h->top_cbp = h->cbp_table[top_xy]; + } else if(IS_INTRA(mb_type)) { + h->top_cbp = 0x1C0; + } else { + h->top_cbp = 0; + } + // left_cbp + if (left_type[0]) { + h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; + } else if(IS_INTRA(mb_type)) { + h->left_cbp = 0x1C0; + } else { + h->left_cbp = 0; + } + if (left_type[0]) { + h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; + } + if (left_type[1]) { + h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; + } + } + +#if 1 + //FIXME direct mb can skip much of this + if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ + int list; + for(list=0; list<1+(h->slice_type==B_TYPE); list++){ + if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){ + /*if(!h->mv_cache_clean[list]){ + memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? + memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); + h->mv_cache_clean[list]= 1; + }*/ + continue; + } + h->mv_cache_clean[list]= 0; + + if(USES_LIST(top_type, list)){ + const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; + const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; + *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; + *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; + *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; + *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3]; + h->ref_cache[list][scan8[0] + 0 - 1*8]= + h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; + h->ref_cache[list][scan8[0] + 2 - 1*8]= + h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; + }else{ + *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; + *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; + } + + //FIXME unify cleanup or sth + if(USES_LIST(left_type[0], list)){ + const int b_xy= h->mb2b_xy[left_xy[0]] + 3; + const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]]; + h->ref_cache[list][scan8[0] - 1 + 0*8]= + h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)]; + }else{ + *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0; + h->ref_cache[list][scan8[0] - 1 + 0*8]= + h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + } + + if(USES_LIST(left_type[1], list)){ + const int b_xy= h->mb2b_xy[left_xy[1]] + 3; + const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1; + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]]; + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]]; + h->ref_cache[list][scan8[0] - 1 + 2*8]= + h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)]; + }else{ + *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]= + *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0; + h->ref_cache[list][scan8[0] - 1 + 2*8]= + h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + assert((!left_type[0]) == (!left_type[1])); + } + + if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) + continue; + + if(USES_LIST(topleft_type, list)){ + const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; + const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; + h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; + }else{ + *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; + h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + } + + if(USES_LIST(topright_type, list)){ + const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; + const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; + *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; + h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; + }else{ + *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; + h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; + } + + + h->ref_cache[list][scan8[5 ]+1] = + h->ref_cache[list][scan8[7 ]+1] = + h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) + h->ref_cache[list][scan8[4 ]] = + h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; + *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= + *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= + *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) + *(uint32_t*)h->mv_cache [list][scan8[4 ]]= + *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; + + if( h->pps.cabac ) { + /* XXX beurk, Load mvd */ + if(USES_LIST(topleft_type, list)){ + const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy]; + }else{ + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0; + } + + if(USES_LIST(top_type, list)){ + const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; + *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; + }else{ + *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; + } + if(USES_LIST(left_type[0], list)){ + const int b_xy= h->mb2b_xy[left_xy[0]] + 3; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; + }else{ + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; + } + if(USES_LIST(left_type[1], list)){ + const int b_xy= h->mb2b_xy[left_xy[1]] + 3; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; + *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; + }else{ + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= + *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; + } + *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= + *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= + *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) + *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= + *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; + + if(h->slice_type == B_TYPE){ + fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); + + if(IS_DIRECT(top_type)){ + *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; + }else if(IS_8X8(top_type)){ + int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; + h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; + h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; + }else{ + *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; + } + + //FIXME interlacing + if(IS_DIRECT(left_type[0])){ + h->direct_cache[scan8[0] - 1 + 0*8]= + h->direct_cache[scan8[0] - 1 + 2*8]= 1; + }else if(IS_8X8(left_type[0])){ + int b8_xy = h->mb2b8_xy[left_xy[0]] + 1; + h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy]; + h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride]; + }else{ + h->direct_cache[scan8[0] - 1 + 0*8]= + h->direct_cache[scan8[0] - 1 + 2*8]= 0; + } + } + } + } + } +#endif + + h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); +} + +static inline void write_back_intra_pred_mode(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + + h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; + h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2]; + h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3]; + h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4]; + h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4]; + h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4]; + h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4]; +} + +/** + * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. + */ +static inline int check_intra4x4_pred_mode(H264Context *h){ + MpegEncContext * const s = &h->s; + static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; + static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; + int i; + + if(!(h->top_samples_available&0x8000)){ + for(i=0; i<4; i++){ + int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; + if(status<0){ + av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); + return -1; + } else if(status){ + h->intra4x4_pred_mode_cache[scan8[0] + i]= status; + } + } + } + + if(!(h->left_samples_available&0x8000)){ + for(i=0; i<4; i++){ + int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; + if(status<0){ + av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); + return -1; + } else if(status){ + h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; + } + } + } + + return 0; +} //FIXME cleanup like next + +/** + * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. + */ +static inline int check_intra_pred_mode(H264Context *h, int mode){ + MpegEncContext * const s = &h->s; + static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; + static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; + + if(mode < 0 || mode > 6) { + av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + if(!(h->top_samples_available&0x8000)){ + mode= top[ mode ]; + if(mode<0){ + av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + } + + if(!(h->left_samples_available&0x8000)){ + mode= left[ mode ]; + if(mode<0){ + av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + } + + return mode; +} + +/** + * gets the predicted intra4x4 prediction mode. + */ +static inline int pred_intra_mode(H264Context *h, int n){ + const int index8= scan8[n]; + const int left= h->intra4x4_pred_mode_cache[index8 - 1]; + const int top = h->intra4x4_pred_mode_cache[index8 - 8]; + const int min= FFMIN(left, top); + + tprintf("mode:%d %d min:%d\n", left ,top, min); + + if(min<0) return DC_PRED; + else return min; +} + +static inline void write_back_non_zero_count(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + + h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1]; + h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2]; + h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3]; + h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4]; + h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4]; + h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4]; + h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4]; + + h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2]; + h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2]; + h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1]; + + h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; + h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; + h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; +} + +/** + * gets the predicted number of non zero coefficients. + * @param n block index + */ +static inline int pred_non_zero_count(H264Context *h, int n){ + const int index8= scan8[n]; + const int left= h->non_zero_count_cache[index8 - 1]; + const int top = h->non_zero_count_cache[index8 - 8]; + int i= left + top; + + if(i<64) i= (i+1)>>1; + + tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31); + + return i&31; +} + +static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ + const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; + + if(topright_ref != PART_NOT_AVAILABLE){ + *C= h->mv_cache[list][ i - 8 + part_width ]; + return topright_ref; + }else{ + tprintf("topright MV not available\n"); + + *C= h->mv_cache[list][ i - 8 - 1 ]; + return h->ref_cache[list][ i - 8 - 1 ]; + } +} + +/** + * gets the predicted MV. + * @param n the block index + * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) + * @param mx the x component of the predicted motion vector + * @param my the y component of the predicted motion vector + */ +static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ + const int index8= scan8[n]; + const int top_ref= h->ref_cache[list][ index8 - 8 ]; + const int left_ref= h->ref_cache[list][ index8 - 1 ]; + const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; + const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; + const int16_t * C; + int diagonal_ref, match_count; + + assert(part_width==1 || part_width==2 || part_width==4); + +/* mv_cache + B . . A T T T T + U . . L . . , . + U . . L . . . . + U . . L . . , . + . . . L . . . . +*/ + + diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); + match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); + tprintf("pred_motion match_count=%d\n", match_count); + if(match_count > 1){ //most common + *mx= mid_pred(A[0], B[0], C[0]); + *my= mid_pred(A[1], B[1], C[1]); + }else if(match_count==1){ + if(left_ref==ref){ + *mx= A[0]; + *my= A[1]; + }else if(top_ref==ref){ + *mx= B[0]; + *my= B[1]; + }else{ + *mx= C[0]; + *my= C[1]; + } + }else{ + if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ + *mx= A[0]; + *my= A[1]; + }else{ + *mx= mid_pred(A[0], B[0], C[0]); + *my= mid_pred(A[1], B[1], C[1]); + } + } + + tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); +} + +/** + * gets the directionally predicted 16x8 MV. + * @param n the block index + * @param mx the x component of the predicted motion vector + * @param my the y component of the predicted motion vector + */ +static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ + if(n==0){ + const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; + const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; + + tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); + + if(top_ref == ref){ + *mx= B[0]; + *my= B[1]; + return; + } + }else{ + const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; + const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; + + tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + + if(left_ref == ref){ + *mx= A[0]; + *my= A[1]; + return; + } + } + + //RARE + pred_motion(h, n, 4, list, ref, mx, my); +} + +/** + * gets the directionally predicted 8x16 MV. + * @param n the block index + * @param mx the x component of the predicted motion vector + * @param my the y component of the predicted motion vector + */ +static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){ + if(n==0){ + const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; + const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; + + tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + + if(left_ref == ref){ + *mx= A[0]; + *my= A[1]; + return; + } + }else{ + const int16_t * C; + int diagonal_ref; + + diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); + + tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); + + if(diagonal_ref == ref){ + *mx= C[0]; + *my= C[1]; + return; + } + } + + //RARE + pred_motion(h, n, 2, list, ref, mx, my); +} + +static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){ + const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; + const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; + + tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); + + if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE + || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0) + || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){ + + *mx = *my = 0; + return; + } + + pred_motion(h, 0, 4, 0, 0, mx, my); + + return; +} + +static inline void direct_dist_scale_factor(H264Context * const h){ + const int poc = h->s.current_picture_ptr->poc; + const int poc1 = h->ref_list[1][0].poc; + int i; + for(i=0; iref_count[0]; i++){ + int poc0 = h->ref_list[0][i].poc; + int td = clip(poc1 - poc0, -128, 127); + if(td == 0 /* FIXME || pic0 is a long-term ref */){ + h->dist_scale_factor[i] = 256; + }else{ + int tb = clip(poc - poc0, -128, 127); + int tx = (16384 + (ABS(td) >> 1)) / td; + h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023); + } + } +} +static inline void direct_ref_list_init(H264Context * const h){ + MpegEncContext * const s = &h->s; + Picture * const ref1 = &h->ref_list[1][0]; + Picture * const cur = s->current_picture_ptr; + int list, i, j; + if(cur->pict_type == I_TYPE) + cur->ref_count[0] = 0; + if(cur->pict_type != B_TYPE) + cur->ref_count[1] = 0; + for(list=0; list<2; list++){ + cur->ref_count[list] = h->ref_count[list]; + for(j=0; jref_count[list]; j++) + cur->ref_poc[list][j] = h->ref_list[list][j].poc; + } + if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred) + return; + for(list=0; list<2; list++){ + for(i=0; iref_count[list]; i++){ + const int poc = ref1->ref_poc[list][i]; + h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */ + for(j=0; jref_count[list]; j++) + if(h->ref_list[list][j].poc == poc){ + h->map_col_to_list0[list][i] = j; + break; + } + } + } +} + +static inline void pred_direct_motion(H264Context * const h, int *mb_type){ + MpegEncContext * const s = &h->s; + const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; + const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride; + const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; + const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy]; + const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy]; + const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy]; + const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy]; + const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy]; + const int is_b8x8 = IS_8X8(*mb_type); + int sub_mb_type; + int i8, i4; + + if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){ + /* FIXME save sub mb types from previous frames (or derive from MVs) + * so we know exactly what block size to use */ + sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */ + *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1; + }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){ + sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ + *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */ + }else{ + sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ + *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1; + } + if(!is_b8x8) + *mb_type |= MB_TYPE_DIRECT2; + + tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col); + + if(h->direct_spatial_mv_pred){ + int ref[2]; + int mv[2][2]; + int list; + + /* ref = min(neighbors) */ + for(list=0; list<2; list++){ + int refa = h->ref_cache[list][scan8[0] - 1]; + int refb = h->ref_cache[list][scan8[0] - 8]; + int refc = h->ref_cache[list][scan8[0] - 8 + 4]; + if(refc == -2) + refc = h->ref_cache[list][scan8[0] - 8 - 1]; + ref[list] = refa; + if(ref[list] < 0 || (refb < ref[list] && refb >= 0)) + ref[list] = refb; + if(ref[list] < 0 || (refc < ref[list] && refc >= 0)) + ref[list] = refc; + if(ref[list] < 0) + ref[list] = -1; + } + + if(ref[0] < 0 && ref[1] < 0){ + ref[0] = ref[1] = 0; + mv[0][0] = mv[0][1] = + mv[1][0] = mv[1][1] = 0; + }else{ + for(list=0; list<2; list++){ + if(ref[list] >= 0) + pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]); + else + mv[list][0] = mv[list][1] = 0; + } + } + + if(ref[1] < 0){ + *mb_type &= ~MB_TYPE_P0L1; + sub_mb_type &= ~MB_TYPE_P0L1; + }else if(ref[0] < 0){ + *mb_type &= ~MB_TYPE_P0L0; + sub_mb_type &= ~MB_TYPE_P0L0; + } + + if(IS_16X16(*mb_type)){ + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); + fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); + if(!IS_INTRA(mb_type_col) + && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1) + || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1 + && (h->x264_build>33 || !h->x264_build)))){ + if(ref[0] > 0) + fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4); + else + fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + if(ref[1] > 0) + fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4); + else + fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); + }else{ + fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4); + fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4); + } + }else{ + for(i8=0; i8<4; i8++){ + const int x8 = i8&1; + const int y8 = i8>>1; + + if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) + continue; + h->sub_mb_type[i8] = sub_mb_type; + + fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4); + fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4); + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); + + /* col_zero_flag */ + if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0 + || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0 + && (h->x264_build>33 || !h->x264_build)))){ + const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1; + if(IS_SUB_8X8(sub_mb_type)){ + const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride]; + if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){ + if(ref[0] == 0) + fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); + if(ref[1] == 0) + fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); + } + }else + for(i4=0; i4<4; i4++){ + const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride]; + if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){ + if(ref[0] == 0) + *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0; + if(ref[1] == 0) + *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0; + } + } + } + } + } + }else{ /* direct temporal mv pred */ + if(IS_16X16(*mb_type)){ + fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); + if(IS_INTRA(mb_type_col)){ + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); + }else{ + const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]] + : h->map_col_to_list0[1][l1ref1[0]]; + const int dist_scale_factor = h->dist_scale_factor[ref0]; + const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0]; + int mv_l0[2]; + mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8; + mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8; + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4); + fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4); + } + }else{ + for(i8=0; i8<4; i8++){ + const int x8 = i8&1; + const int y8 = i8>>1; + int ref0, dist_scale_factor; + const int16_t (*l1mv)[2]= l1mv0; + + if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) + continue; + h->sub_mb_type[i8] = sub_mb_type; + if(IS_INTRA(mb_type_col)){ + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); + fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); + continue; + } + + ref0 = l1ref0[x8 + y8*h->b8_stride]; + if(ref0 >= 0) + ref0 = h->map_col_to_list0[0][ref0]; + else{ + ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]]; + l1mv= l1mv1; + } + dist_scale_factor = h->dist_scale_factor[ref0]; + + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); + if(IS_SUB_8X8(sub_mb_type)){ + const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride]; + int mx = (dist_scale_factor * mv_col[0] + 128) >> 8; + int my = (dist_scale_factor * mv_col[1] + 128) >> 8; + fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); + fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4); + }else + for(i4=0; i4<4; i4++){ + const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride]; + int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]]; + mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8; + mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8; + *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = + pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); + } + } + } + } +} + +static inline void write_back_motion(H264Context *h, int mb_type){ + MpegEncContext * const s = &h->s; + const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; + const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; + int list; + + if(!USES_LIST(mb_type, 0)) + fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); + + for(list=0; list<2; list++){ + int y; + if(!USES_LIST(mb_type, list)) + continue; + + for(y=0; y<4; y++){ + *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; + *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; + } + if( h->pps.cabac ) { + for(y=0; y<4; y++){ + *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; + *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; + } + } + + { + uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; + ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; + ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; + ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; + ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; + } + } + + if(h->slice_type == B_TYPE && h->pps.cabac){ + if(IS_8X8(mb_type)){ + uint8_t *direct_table = &h->direct_table[b8_xy]; + direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; + direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; + direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; + } + } +} + +/** + * Decodes a network abstraction layer unit. + * @param consumed is the number of bytes used as input + * @param length is the length of the array + * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing? + * @returns decoded bytes, might be src+1 if no escapes + */ +static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){ + int i, si, di; + uint8_t *dst; + +// src[0]&0x80; //forbidden bit + h->nal_ref_idc= src[0]>>5; + h->nal_unit_type= src[0]&0x1F; + + src++; length--; +#if 0 + for(i=0; i0 && src[i-1]==0) i--; + if(i+2=length-1){ //no escaped 0 + *dst_length= length; + *consumed= length+1; //+1 for the header + return src; + } + + h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length); + dst= h->rbsp_buffer; + +//printf("decoding esc\n"); + si=di=0; + while(si=0); + assert(dst_length>0); + + dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type; + + if(length==0) return 1; + + escape_count= 0; + for(i=0; i0 && src[i-1]==0) + i--; + if(i+2 dst_length) + return -1; + + //this should be damn rare (hopefully) + + h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count); + temp= h->rbsp_buffer; +//printf("encoding esc\n"); + + si= 0; + di= 0; + while(si < length){ + if(si+2>=1; + } + return 0; +} + +/** + * idct tranforms the 16 dc values and dequantize them. + * @param qp quantization parameter + */ +static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ +#define stride 16 + int i; + int temp[16]; //FIXME check if this is a good idea + static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; + static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; + +//memset(block, 64, 2*256); +//return; + for(i=0; i<4; i++){ + const int offset= y_offset[i]; + const int z0= block[offset+stride*0] + block[offset+stride*4]; + const int z1= block[offset+stride*0] - block[offset+stride*4]; + const int z2= block[offset+stride*1] - block[offset+stride*5]; + const int z3= block[offset+stride*1] + block[offset+stride*5]; + + temp[4*i+0]= z0+z3; + temp[4*i+1]= z1+z2; + temp[4*i+2]= z1-z2; + temp[4*i+3]= z0-z3; + } + + for(i=0; i<4; i++){ + const int offset= x_offset[i]; + const int z0= temp[4*0+i] + temp[4*2+i]; + const int z1= temp[4*0+i] - temp[4*2+i]; + const int z2= temp[4*1+i] - temp[4*3+i]; + const int z3= temp[4*1+i] + temp[4*3+i]; + + block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual + block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8)); + block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8)); + block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8)); + } +} + +#if 0 +/** + * dct tranforms the 16 dc values. + * @param qp quantization parameter ??? FIXME + */ +static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){ +// const int qmul= dequant_coeff[qp][0]; + int i; + int temp[16]; //FIXME check if this is a good idea + static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride}; + static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride}; + + for(i=0; i<4; i++){ + const int offset= y_offset[i]; + const int z0= block[offset+stride*0] + block[offset+stride*4]; + const int z1= block[offset+stride*0] - block[offset+stride*4]; + const int z2= block[offset+stride*1] - block[offset+stride*5]; + const int z3= block[offset+stride*1] + block[offset+stride*5]; + + temp[4*i+0]= z0+z3; + temp[4*i+1]= z1+z2; + temp[4*i+2]= z1-z2; + temp[4*i+3]= z0-z3; + } + + for(i=0; i<4; i++){ + const int offset= x_offset[i]; + const int z0= temp[4*0+i] + temp[4*2+i]; + const int z1= temp[4*0+i] - temp[4*2+i]; + const int z2= temp[4*1+i] - temp[4*3+i]; + const int z3= temp[4*1+i] + temp[4*3+i]; + + block[stride*0 +offset]= (z0 + z3)>>1; + block[stride*2 +offset]= (z1 + z2)>>1; + block[stride*8 +offset]= (z1 - z2)>>1; + block[stride*10+offset]= (z0 - z3)>>1; + } +} +#endif + +#undef xStride +#undef stride + +static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){ + const int stride= 16*2; + const int xStride= 16; + int a,b,c,d,e; + + a= block[stride*0 + xStride*0]; + b= block[stride*0 + xStride*1]; + c= block[stride*1 + xStride*0]; + d= block[stride*1 + xStride*1]; + + e= a-b; + a= a+b; + b= c-d; + c= c+d; + + block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7; + block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7; + block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7; + block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7; +} + +#if 0 +static void chroma_dc_dct_c(DCTELEM *block){ + const int stride= 16*2; + const int xStride= 16; + int a,b,c,d,e; + + a= block[stride*0 + xStride*0]; + b= block[stride*0 + xStride*1]; + c= block[stride*1 + xStride*0]; + d= block[stride*1 + xStride*1]; + + e= a-b; + a= a+b; + b= c-d; + c= c+d; + + block[stride*0 + xStride*0]= (a+c); + block[stride*0 + xStride*1]= (e+b); + block[stride*1 + xStride*0]= (a-c); + block[stride*1 + xStride*1]= (e-b); +} +#endif + +/** + * gets the chroma qp. + */ +static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){ + + return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)]; +} + + +#if 0 +static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){ + int i; + //FIXME try int temp instead of block + + for(i=0; i<4; i++){ + const int d0= src1[0 + i*stride] - src2[0 + i*stride]; + const int d1= src1[1 + i*stride] - src2[1 + i*stride]; + const int d2= src1[2 + i*stride] - src2[2 + i*stride]; + const int d3= src1[3 + i*stride] - src2[3 + i*stride]; + const int z0= d0 + d3; + const int z3= d0 - d3; + const int z1= d1 + d2; + const int z2= d1 - d2; + + block[0 + 4*i]= z0 + z1; + block[1 + 4*i]= 2*z3 + z2; + block[2 + 4*i]= z0 - z1; + block[3 + 4*i]= z3 - 2*z2; + } + + for(i=0; i<4; i++){ + const int z0= block[0*4 + i] + block[3*4 + i]; + const int z3= block[0*4 + i] - block[3*4 + i]; + const int z1= block[1*4 + i] + block[2*4 + i]; + const int z2= block[1*4 + i] - block[2*4 + i]; + + block[0*4 + i]= z0 + z1; + block[1*4 + i]= 2*z3 + z2; + block[2*4 + i]= z0 - z1; + block[3*4 + i]= z3 - 2*z2; + } +} +#endif + +//FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close +//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away) +static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){ + int i; + const int * const quant_table= quant_coeff[qscale]; + const int bias= intra ? (1<dc_threshold2){ + if(level>0){ + level= (dc_bias + level)>>(QUANT_SHIFT-2); + block[0]= level; + }else{ + level= (dc_bias - level)>>(QUANT_SHIFT-2); + block[0]= -level; + } +// last_non_zero = i; + }else{ + block[0]=0; + } + }else{ + const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6; + const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1; + const unsigned int dc_threshold2= (dc_threshold1<<1); + + int level= block[0]*quant_table[0]; + if(((unsigned)(level+dc_threshold1))>dc_threshold2){ + if(level>0){ + level= (dc_bias + level)>>(QUANT_SHIFT+1); + block[0]= level; + }else{ + level= (dc_bias - level)>>(QUANT_SHIFT+1); + block[0]= -level; + } +// last_non_zero = i; + }else{ + block[0]=0; + } + } + last_non_zero= 0; + i=1; + }else{ + last_non_zero= -1; + i=0; + } + + for(; i<16; i++){ + const int j= scantable[i]; + int level= block[j]*quant_table[j]; + +// if( bias+level >= (1<<(QMAT_SHIFT - 3)) +// || bias-level >= (1<<(QMAT_SHIFT - 3))){ + if(((unsigned)(level+threshold1))>threshold2){ + if(level>0){ + level= (bias + level)>>QUANT_SHIFT; + block[j]= level; + }else{ + level= (bias - level)>>QUANT_SHIFT; + block[j]= -level; + } + last_non_zero = i; + }else{ + block[j]=0; + } + } + + return last_non_zero; +} + +static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){ + const uint32_t a= ((uint32_t*)(src-stride))[0]; + ((uint32_t*)(src+0*stride))[0]= a; + ((uint32_t*)(src+1*stride))[0]= a; + ((uint32_t*)(src+2*stride))[0]= a; + ((uint32_t*)(src+3*stride))[0]= a; +} + +static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){ + ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101; + ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101; + ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101; + ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101; +} + +static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){ + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; + + ((uint32_t*)(src+0*stride))[0]= + ((uint32_t*)(src+1*stride))[0]= + ((uint32_t*)(src+2*stride))[0]= + ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; +} + +static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){ + const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; + + ((uint32_t*)(src+0*stride))[0]= + ((uint32_t*)(src+1*stride))[0]= + ((uint32_t*)(src+2*stride))[0]= + ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; +} + +static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){ + const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; + + ((uint32_t*)(src+0*stride))[0]= + ((uint32_t*)(src+1*stride))[0]= + ((uint32_t*)(src+2*stride))[0]= + ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; +} + +static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){ + ((uint32_t*)(src+0*stride))[0]= + ((uint32_t*)(src+1*stride))[0]= + ((uint32_t*)(src+2*stride))[0]= + ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U; +} + + +#define LOAD_TOP_RIGHT_EDGE\ + const int t4= topright[0];\ + const int t5= topright[1];\ + const int t6= topright[2];\ + const int t7= topright[3];\ + +#define LOAD_LEFT_EDGE\ + const int l0= src[-1+0*stride];\ + const int l1= src[-1+1*stride];\ + const int l2= src[-1+2*stride];\ + const int l3= src[-1+3*stride];\ + +#define LOAD_TOP_EDGE\ + const int t0= src[ 0-1*stride];\ + const int t1= src[ 1-1*stride];\ + const int t2= src[ 2-1*stride];\ + const int t3= src[ 3-1*stride];\ + +static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){ + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + + src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; + src[0+2*stride]= + src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; + src[0+1*stride]= + src[1+2*stride]= + src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; + src[0+0*stride]= + src[1+1*stride]= + src[2+2*stride]= + src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[1+0*stride]= + src[2+1*stride]= + src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[2+0*stride]= + src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; +} + +static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE +// LOAD_LEFT_EDGE + + src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; + src[1+0*stride]= + src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; + src[2+0*stride]= + src[1+1*stride]= + src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; + src[3+0*stride]= + src[2+1*stride]= + src[1+2*stride]= + src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; + src[3+1*stride]= + src[2+2*stride]= + src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; + src[3+2*stride]= + src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; + src[3+3*stride]=(t6 + 3*t7 + 2)>>2; +} + +static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){ + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + const __attribute__((unused)) int unu= l3; + + src[0+0*stride]= + src[1+2*stride]=(lt + t0 + 1)>>1; + src[1+0*stride]= + src[2+2*stride]=(t0 + t1 + 1)>>1; + src[2+0*stride]= + src[3+2*stride]=(t1 + t2 + 1)>>1; + src[3+0*stride]=(t2 + t3 + 1)>>1; + src[0+1*stride]= + src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[1+1*stride]= + src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[2+1*stride]= + src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; + src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; +} + +static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){ + LOAD_TOP_EDGE + LOAD_TOP_RIGHT_EDGE + const __attribute__((unused)) int unu= t7; + + src[0+0*stride]=(t0 + t1 + 1)>>1; + src[1+0*stride]= + src[0+2*stride]=(t1 + t2 + 1)>>1; + src[2+0*stride]= + src[1+2*stride]=(t2 + t3 + 1)>>1; + src[3+0*stride]= + src[2+2*stride]=(t3 + t4+ 1)>>1; + src[3+2*stride]=(t4 + t5+ 1)>>1; + src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[1+1*stride]= + src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; + src[2+1*stride]= + src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; + src[3+1*stride]= + src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; + src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; +} + +static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){ + LOAD_LEFT_EDGE + + src[0+0*stride]=(l0 + l1 + 1)>>1; + src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; + src[2+0*stride]= + src[0+1*stride]=(l1 + l2 + 1)>>1; + src[3+0*stride]= + src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; + src[2+1*stride]= + src[0+2*stride]=(l2 + l3 + 1)>>1; + src[3+1*stride]= + src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; + src[3+2*stride]= + src[1+3*stride]= + src[0+3*stride]= + src[2+2*stride]= + src[2+3*stride]= + src[3+3*stride]=l3; +} + +static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){ + const int lt= src[-1-1*stride]; + LOAD_TOP_EDGE + LOAD_LEFT_EDGE + const __attribute__((unused)) int unu= t3; + + src[0+0*stride]= + src[2+1*stride]=(lt + l0 + 1)>>1; + src[1+0*stride]= + src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; + src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; + src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; + src[0+1*stride]= + src[2+2*stride]=(l0 + l1 + 1)>>1; + src[1+1*stride]= + src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; + src[0+2*stride]= + src[2+3*stride]=(l1 + l2+ 1)>>1; + src[1+2*stride]= + src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; + src[0+3*stride]=(l2 + l3 + 1)>>1; + src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; +} + +static void pred16x16_vertical_c(uint8_t *src, int stride){ + int i; + const uint32_t a= ((uint32_t*)(src-stride))[0]; + const uint32_t b= ((uint32_t*)(src-stride))[1]; + const uint32_t c= ((uint32_t*)(src-stride))[2]; + const uint32_t d= ((uint32_t*)(src-stride))[3]; + + for(i=0; i<16; i++){ + ((uint32_t*)(src+i*stride))[0]= a; + ((uint32_t*)(src+i*stride))[1]= b; + ((uint32_t*)(src+i*stride))[2]= c; + ((uint32_t*)(src+i*stride))[3]= d; + } +} + +static void pred16x16_horizontal_c(uint8_t *src, int stride){ + int i; + + for(i=0; i<16; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= + ((uint32_t*)(src+i*stride))[2]= + ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101; + } +} + +static void pred16x16_dc_c(uint8_t *src, int stride){ + int i, dc=0; + + for(i=0;i<16; i++){ + dc+= src[-1+i*stride]; + } + + for(i=0;i<16; i++){ + dc+= src[i-stride]; + } + + dc= 0x01010101*((dc + 16)>>5); + + for(i=0; i<16; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= + ((uint32_t*)(src+i*stride))[2]= + ((uint32_t*)(src+i*stride))[3]= dc; + } +} + +static void pred16x16_left_dc_c(uint8_t *src, int stride){ + int i, dc=0; + + for(i=0;i<16; i++){ + dc+= src[-1+i*stride]; + } + + dc= 0x01010101*((dc + 8)>>4); + + for(i=0; i<16; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= + ((uint32_t*)(src+i*stride))[2]= + ((uint32_t*)(src+i*stride))[3]= dc; + } +} + +static void pred16x16_top_dc_c(uint8_t *src, int stride){ + int i, dc=0; + + for(i=0;i<16; i++){ + dc+= src[i-stride]; + } + dc= 0x01010101*((dc + 8)>>4); + + for(i=0; i<16; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= + ((uint32_t*)(src+i*stride))[2]= + ((uint32_t*)(src+i*stride))[3]= dc; + } +} + +static void pred16x16_128_dc_c(uint8_t *src, int stride){ + int i; + + for(i=0; i<16; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= + ((uint32_t*)(src+i*stride))[2]= + ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; + } +} + +static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){ + int i, j, k; + int a; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + const uint8_t * const src0 = src+7-stride; + const uint8_t *src1 = src+8*stride-1; + const uint8_t *src2 = src1-2*stride; // == src+6*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + for(k=2; k<=8; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + if(svq3){ + H = ( 5*(H/4) ) / 16; + V = ( 5*(V/4) ) / 16; + + /* required for 100% accuracy */ + i = H; H = V; V = i; + }else{ + H = ( 5*H+32 ) >> 6; + V = ( 5*V+32 ) >> 6; + } + + a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); + for(j=16; j>0; --j) { + int b = a; + a += V; + for(i=-16; i<0; i+=4) { + src[16+i] = cm[ (b ) >> 5 ]; + src[17+i] = cm[ (b+ H) >> 5 ]; + src[18+i] = cm[ (b+2*H) >> 5 ]; + src[19+i] = cm[ (b+3*H) >> 5 ]; + b += 4*H; + } + src += stride; + } +} + +static void pred16x16_plane_c(uint8_t *src, int stride){ + pred16x16_plane_compat_c(src, stride, 0); +} + +static void pred8x8_vertical_c(uint8_t *src, int stride){ + int i; + const uint32_t a= ((uint32_t*)(src-stride))[0]; + const uint32_t b= ((uint32_t*)(src-stride))[1]; + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= a; + ((uint32_t*)(src+i*stride))[1]= b; + } +} + +static void pred8x8_horizontal_c(uint8_t *src, int stride){ + int i; + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101; + } +} + +static void pred8x8_128_dc_c(uint8_t *src, int stride){ + int i; + + for(i=0; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U; + } +} + +static void pred8x8_left_dc_c(uint8_t *src, int stride){ + int i; + int dc0, dc2; + + dc0=dc2=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride]; + dc2+= src[-1+(i+4)*stride]; + } + dc0= 0x01010101*((dc0 + 2)>>2); + dc2= 0x01010101*((dc2 + 2)>>2); + + for(i=0; i<4; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= dc0; + } + for(i=4; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= + ((uint32_t*)(src+i*stride))[1]= dc2; + } +} + +static void pred8x8_top_dc_c(uint8_t *src, int stride){ + int i; + int dc0, dc1; + + dc0=dc1=0; + for(i=0;i<4; i++){ + dc0+= src[i-stride]; + dc1+= src[4+i-stride]; + } + dc0= 0x01010101*((dc0 + 2)>>2); + dc1= 0x01010101*((dc1 + 2)>>2); + + for(i=0; i<4; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc1; + } + for(i=4; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc1; + } +} + + +static void pred8x8_dc_c(uint8_t *src, int stride){ + int i; + int dc0, dc1, dc2, dc3; + + dc0=dc1=dc2=0; + for(i=0;i<4; i++){ + dc0+= src[-1+i*stride] + src[i-stride]; + dc1+= src[4+i-stride]; + dc2+= src[-1+(i+4)*stride]; + } + dc3= 0x01010101*((dc1 + dc2 + 4)>>3); + dc0= 0x01010101*((dc0 + 4)>>3); + dc1= 0x01010101*((dc1 + 2)>>2); + dc2= 0x01010101*((dc2 + 2)>>2); + + for(i=0; i<4; i++){ + ((uint32_t*)(src+i*stride))[0]= dc0; + ((uint32_t*)(src+i*stride))[1]= dc1; + } + for(i=4; i<8; i++){ + ((uint32_t*)(src+i*stride))[0]= dc2; + ((uint32_t*)(src+i*stride))[1]= dc3; + } +} + +static void pred8x8_plane_c(uint8_t *src, int stride){ + int j, k; + int a; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + const uint8_t * const src0 = src+3-stride; + const uint8_t *src1 = src+4*stride-1; + const uint8_t *src2 = src1-2*stride; // == src+2*stride-1; + int H = src0[1] - src0[-1]; + int V = src1[0] - src2[ 0]; + for(k=2; k<=4; ++k) { + src1 += stride; src2 -= stride; + H += k*(src0[k] - src0[-k]); + V += k*(src1[0] - src2[ 0]); + } + H = ( 17*H+16 ) >> 5; + V = ( 17*V+16 ) >> 5; + + a = 16*(src1[0] + src2[8]+1) - 3*(V+H); + for(j=8; j>0; --j) { + int b = a; + a += V; + src[0] = cm[ (b ) >> 5 ]; + src[1] = cm[ (b+ H) >> 5 ]; + src[2] = cm[ (b+2*H) >> 5 ]; + src[3] = cm[ (b+3*H) >> 5 ]; + src[4] = cm[ (b+4*H) >> 5 ]; + src[5] = cm[ (b+5*H) >> 5 ]; + src[6] = cm[ (b+6*H) >> 5 ]; + src[7] = cm[ (b+7*H) >> 5 ]; + src += stride; + } +} + +#define SRC(x,y) src[(x)+(y)*stride] +#define PL(y) \ + const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; +#define PREDICT_8x8_LOAD_LEFT \ + const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ + + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ + PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ + const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 + +#define PT(x) \ + const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOP \ + const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ + + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ + PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ + const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ + + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 + +#define PTR(x) \ + t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOPRIGHT \ + int t8, t9, t10, t11, t12, t13, t14, t15; \ + if(has_topright) { \ + PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ + t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ + } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); + +#define PREDICT_8x8_LOAD_TOPLEFT \ + const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 + +#define PREDICT_8x8_DC(v) \ + int y; \ + for( y = 0; y < 8; y++ ) { \ + ((uint32_t*)src)[0] = \ + ((uint32_t*)src)[1] = v; \ + src += stride; \ + } + +static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_DC(0x80808080); +} +static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; + const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101; + PREDICT_8x8_DC(dc); +} +static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101; + PREDICT_8x8_DC(dc); +} +static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOP; + const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7 + +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101; + PREDICT_8x8_DC(dc); +} +static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; +#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\ + ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y + ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); +#undef ROW +} +static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + int y; + PREDICT_8x8_LOAD_TOP; + src[0] = t0; + src[1] = t1; + src[2] = t2; + src[3] = t3; + src[4] = t4; + src[5] = t5; + src[6] = t6; + src[7] = t7; + for( y = 1; y < 8; y++ ) + *(uint64_t*)(src+y*stride) = *(uint64_t*)src; +} +static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; + SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; + SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; + SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; + SRC(7,7)= (t14 + 3*t15 + 2) >> 2; +} +static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; + SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; + +} +static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; + SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; + SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; + SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; + SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; + SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; + SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; + SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(7,0)= (t6 + t7 + 1) >> 1; +} +static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l6 + l7 + 1) >> 1; + SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; + SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; + SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; + SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; + SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; + SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; + SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; + SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; + SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; + SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; + SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; + SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; + SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; + SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; +} +static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + t1 + 1) >> 1; + SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; + SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; + SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; + SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; + SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; + SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; + SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; + SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; + SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; + SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(7,6)= (t10 + t11 + 1) >> 1; + SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; +} +static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; + SRC(0,0)= (l0 + l1 + 1) >> 1; + SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; + SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; + SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; + SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; + SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; + SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; + SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; + SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= + SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= + SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= + SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; +} +#undef PREDICT_8x8_LOAD_LEFT +#undef PREDICT_8x8_LOAD_TOP +#undef PREDICT_8x8_LOAD_TOPLEFT +#undef PREDICT_8x8_LOAD_TOPRIGHT +#undef PREDICT_8x8_DC +#undef PTR +#undef PT +#undef PL +#undef SRC + +static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int src_x_offset, int src_y_offset, + qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ + MpegEncContext * const s = &h->s; + const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; + const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; + const int luma_xy= (mx&3) + ((my&3)<<2); + uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize; + uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize; + uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize; + int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it + int extra_height= extra_width; + int emu=0; + const int full_mx= mx>>2; + const int full_my= my>>2; + const int pic_width = 16*s->mb_width; + const int pic_height = 16*s->mb_height; + + if(!pic->data[0]) + return; + + if(mx&7) extra_width -= 3; + if(my&7) extra_height -= 3; + + if( full_mx < 0-extra_width + || full_my < 0-extra_height + || full_mx + 16/*FIXME*/ > pic_width + extra_width + || full_my + 16/*FIXME*/ > pic_height + extra_height){ + ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + src_y= s->edge_emu_buffer + 2 + 2*s->linesize; + emu=1; + } + + qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps? + if(!square){ + qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize); + } + + if(s->flags&CODEC_FLAG_GRAY) return; + + if(emu){ + ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + src_cb= s->edge_emu_buffer; + } + chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7); + + if(emu){ + ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + src_cr= s->edge_emu_buffer; + } + chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7); +} + +static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, + int list0, int list1){ + MpegEncContext * const s = &h->s; + qpel_mc_func *qpix_op= qpix_put; + h264_chroma_mc_func chroma_op= chroma_put; + + dest_y += 2*x_offset + 2*y_offset*s-> linesize; + dest_cb += x_offset + y_offset*s->uvlinesize; + dest_cr += x_offset + y_offset*s->uvlinesize; + x_offset += 8*s->mb_x; + y_offset += 8*s->mb_y; + + if(list0){ + Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; + mc_dir_part(h, ref, n, square, chroma_height, delta, 0, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_op, chroma_op); + + qpix_op= qpix_avg; + chroma_op= chroma_avg; + } + + if(list1){ + Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ]; + mc_dir_part(h, ref, n, square, chroma_height, delta, 1, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_op, chroma_op); + } +} + +static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, + h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, + int list0, int list1){ + MpegEncContext * const s = &h->s; + + dest_y += 2*x_offset + 2*y_offset*s-> linesize; + dest_cb += x_offset + y_offset*s->uvlinesize; + dest_cr += x_offset + y_offset*s->uvlinesize; + x_offset += 8*s->mb_x; + y_offset += 8*s->mb_y; + + if(list0 && list1){ + /* don't optimize for luma-only case, since B-frames usually + * use implicit weights => chroma too. */ + uint8_t *tmp_cb = s->obmc_scratchpad; + uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize; + uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize; + int refn0 = h->ref_cache[0][ scan8[n] ]; + int refn1 = h->ref_cache[1][ scan8[n] ]; + + mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, + dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put); + mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, + tmp_y, tmp_cb, tmp_cr, + x_offset, y_offset, qpix_put, chroma_put); + + if(h->use_weight == 2){ + int weight0 = h->implicit_weight[refn0][refn1]; + int weight1 = 64 - weight0; + luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0); + }else{ + luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom, + h->luma_weight[0][refn0], h->luma_weight[1][refn1], + h->luma_offset[0][refn0] + h->luma_offset[1][refn1]); + chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0], + h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]); + chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1], + h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]); + } + }else{ + int list = list1 ? 1 : 0; + int refn = h->ref_cache[list][ scan8[n] ]; + Picture *ref= &h->ref_list[list][refn]; + mc_dir_part(h, ref, n, square, chroma_height, delta, list, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put, chroma_put); + + luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom, + h->luma_weight[list][refn], h->luma_offset[list][refn]); + if(h->use_weight_chroma){ + chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]); + chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]); + } + } +} + +static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, + h264_weight_func *weight_op, h264_biweight_func *weight_avg, + int list0, int list1){ + if((h->use_weight==2 && list0 && list1 + && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32)) + || h->use_weight==1) + mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, + weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); + else + mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); +} + +static inline void prefetch_motion(H264Context *h, int list){ + /* fetch pixels for estimated mv 4 macroblocks ahead + * optimized for 64byte cache lines */ + MpegEncContext * const s = &h->s; + const int refn = h->ref_cache[list][scan8[0]]; + if(refn >= 0){ + const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; + const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; + uint8_t **src= h->ref_list[list][refn].data; + int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64; + s->dsp.prefetch(src[0]+off, s->linesize, 4); + off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; + s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); + } +} + +static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), + qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), + h264_weight_func *weight_op, h264_biweight_func *weight_avg){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + const int mb_type= s->current_picture.mb_type[mb_xy]; + + assert(IS_INTER(mb_type)); + + prefetch_motion(h, 0); + + if(IS_16X16(mb_type)){ + mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, + qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], + &weight_op[0], &weight_avg[0], + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); + }else if(IS_16X8(mb_type)){ + mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, + qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], + &weight_op[1], &weight_avg[1], + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); + mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, + qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], + &weight_op[1], &weight_avg[1], + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); + }else if(IS_8X16(mb_type)){ + mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0, + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[2], &weight_avg[2], + IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); + mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0, + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[2], &weight_avg[2], + IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); + }else{ + int i; + + assert(IS_8X8(mb_type)); + + for(i=0; i<4; i++){ + const int sub_mb_type= h->sub_mb_type[i]; + const int n= 4*i; + int x_offset= (i&1)<<2; + int y_offset= (i&2)<<1; + + if(IS_SUB_8X8(sub_mb_type)){ + mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[3], &weight_avg[3], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + }else if(IS_SUB_8X4(sub_mb_type)){ + mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], + &weight_op[4], &weight_avg[4], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, + qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], + &weight_op[4], &weight_avg[4], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + }else if(IS_SUB_4X8(sub_mb_type)){ + mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[5], &weight_avg[5], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[5], &weight_avg[5], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + }else{ + int j; + assert(IS_SUB_4X4(sub_mb_type)); + for(j=0; j<4; j++){ + int sub_x_offset= x_offset + 2*(j&1); + int sub_y_offset= y_offset + (j&2); + mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, + qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[6], &weight_avg[6], + IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); + } + } + } + } + + prefetch_motion(h, 1); +} + +static void decode_init_vlc(H264Context *h){ + static int done = 0; + + if (!done) { + int i; + done = 1; + + init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, + &chroma_dc_coeff_token_len [0], 1, 1, + &chroma_dc_coeff_token_bits[0], 1, 1, 1); + + for(i=0; i<4; i++){ + init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, + &coeff_token_len [i][0], 1, 1, + &coeff_token_bits[i][0], 1, 1, 1); + } + + for(i=0; i<3; i++){ + init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, + &chroma_dc_total_zeros_len [i][0], 1, 1, + &chroma_dc_total_zeros_bits[i][0], 1, 1, 1); + } + for(i=0; i<15; i++){ + init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, + &total_zeros_len [i][0], 1, 1, + &total_zeros_bits[i][0], 1, 1, 1); + } + + for(i=0; i<6; i++){ + init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, + &run_len [i][0], 1, 1, + &run_bits[i][0], 1, 1, 1); + } + init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, + &run_len [6][0], 1, 1, + &run_bits[6][0], 1, 1, 1); + } +} + +/** + * Sets the intra prediction function pointers. + */ +static void init_pred_ptrs(H264Context *h){ +// MpegEncContext * const s = &h->s; + + h->pred4x4[VERT_PRED ]= pred4x4_vertical_c; + h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c; + h->pred4x4[DC_PRED ]= pred4x4_dc_c; + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c; + h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c; + h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c; + h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c; + h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c; + h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c; + h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c; + h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; + h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; + + h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c; + h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c; + h->pred8x8l[DC_PRED ]= pred8x8l_dc_c; + h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c; + h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c; + h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c; + h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c; + h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c; + h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c; + h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c; + h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c; + h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c; + + h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c; + h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c; + h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c; + h->pred8x8[PLANE_PRED8x8 ]= pred8x8_plane_c; + h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c; + h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c; + h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c; + + h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c; + h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c; + h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c; + h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; + h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c; + h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c; + h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c; +} + +static void free_tables(H264Context *h){ + av_freep(&h->intra4x4_pred_mode); + av_freep(&h->chroma_pred_mode_table); + av_freep(&h->cbp_table); + av_freep(&h->mvd_table[0]); + av_freep(&h->mvd_table[1]); + av_freep(&h->direct_table); + av_freep(&h->non_zero_count); + av_freep(&h->slice_table_base); + av_freep(&h->top_borders[1]); + av_freep(&h->top_borders[0]); + h->slice_table= NULL; + + av_freep(&h->mb2b_xy); + av_freep(&h->mb2b8_xy); + + av_freep(&h->s.obmc_scratchpad); +} + +static void init_dequant8_coeff_table(H264Context *h){ + int i,q,x; + const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly + h->dequant8_coeff[0] = h->dequant8_buffer[0]; + h->dequant8_coeff[1] = h->dequant8_buffer[1]; + + for(i=0; i<2; i++ ){ + if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){ + h->dequant8_coeff[1] = h->dequant8_buffer[0]; + break; + } + + for(q=0; q<52; q++){ + int shift = div6[q]; + int idx = rem6[q]; + for(x=0; x<64; x++) + h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = + ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * + h->pps.scaling_matrix8[i][x]) << shift; + } + } +} + +static void init_dequant4_coeff_table(H264Context *h){ + int i,j,q,x; + const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly + for(i=0; i<6; i++ ){ + h->dequant4_coeff[i] = h->dequant4_buffer[i]; + for(j=0; jpps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){ + h->dequant4_coeff[i] = h->dequant4_buffer[j]; + break; + } + } + if(jdequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] = + ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] * + h->pps.scaling_matrix4[i][x]) << shift; + } + } +} + +static void init_dequant_tables(H264Context *h){ + int i,x; + init_dequant4_coeff_table(h); + if(h->pps.transform_8x8_mode) + init_dequant8_coeff_table(h); + if(h->sps.transform_bypass){ + for(i=0; i<6; i++) + for(x=0; x<16; x++) + h->dequant4_coeff[i][0][x] = 1<<6; + if(h->pps.transform_8x8_mode) + for(i=0; i<2; i++) + for(x=0; x<64; x++) + h->dequant8_coeff[i][0][x] = 1<<6; + } +} + + +/** + * allocates tables. + * needs width/height + */ +static int alloc_tables(H264Context *h){ + MpegEncContext * const s = &h->s; + const int big_mb_num= s->mb_stride * (s->mb_height+1); + int x,y; + + CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) + + CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) + + if( h->pps.cabac ) { + CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); + CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t)); + CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t)); + } + + memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t)); + h->slice_table= h->slice_table_base + s->mb_stride + 1; + + CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t)); + CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t)); + for(y=0; ymb_height; y++){ + for(x=0; xmb_width; x++){ + const int mb_xy= x + y*s->mb_stride; + const int b_xy = 4*x + 4*y*h->b_stride; + const int b8_xy= 2*x + 2*y*h->b8_stride; + + h->mb2b_xy [mb_xy]= b_xy; + h->mb2b8_xy[mb_xy]= b8_xy; + } + } + + s->obmc_scratchpad = NULL; + + if(!h->dequant4_coeff[0]) + init_dequant_tables(h); + + return 0; +fail: + free_tables(h); + return -1; +} + +static void common_init(H264Context *h){ + MpegEncContext * const s = &h->s; + + s->width = s->avctx->width; + s->height = s->avctx->height; + s->codec_id= s->avctx->codec->id; + + init_pred_ptrs(h); + + h->dequant_coeff_pps= -1; + s->unrestricted_mv=1; + s->decode=1; //FIXME + + memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t)); + memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t)); +} + +static int decode_init(AVCodecContext *avctx){ + H264Context *h= avctx->priv_data; + MpegEncContext * const s = &h->s; + + MPV_decode_defaults(s); + + s->avctx = avctx; + common_init(h); + + s->out_format = FMT_H264; + s->workaround_bugs= avctx->workaround_bugs; + + // set defaults +// s->decode_mb= ff_h263_decode_mb; + s->low_delay= 1; + avctx->pix_fmt= PIX_FMT_YUV420P; + + decode_init_vlc(h); + + if(avctx->extradata_size > 0 && avctx->extradata && + *(char *)avctx->extradata == 1){ + h->is_avc = 1; + h->got_avcC = 0; + } else { + h->is_avc = 0; + } + + return 0; +} + +static int frame_start(H264Context *h){ + MpegEncContext * const s = &h->s; + int i; + + if(MPV_frame_start(s, s->avctx) < 0) + return -1; + ff_er_frame_start(s); + + assert(s->linesize && s->uvlinesize); + + for(i=0; i<16; i++){ + h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); + } + for(i=0; i<4; i++){ + h->block_offset[16+i]= + h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+16+i]= + h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); + } + + /* can't be in alloc_tables because linesize isn't known there. + * FIXME: redo bipred weight to not require extra buffer? */ + if(!s->obmc_scratchpad) + s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize); + +// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; + return 0; +} + +static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ + MpegEncContext * const s = &h->s; + int i; + + src_y -= linesize; + src_cb -= uvlinesize; + src_cr -= uvlinesize; + + // There are two lines saved, the line above the the top macroblock of a pair, + // and the line above the bottom macroblock + h->left_border[0]= h->top_borders[0][s->mb_x][15]; + for(i=1; i<17; i++){ + h->left_border[i]= src_y[15+i* linesize]; + } + + *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7]; + h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7]; + for(i=1; i<9; i++){ + h->left_border[i+17 ]= src_cb[7+i*uvlinesize]; + h->left_border[i+17+9]= src_cr[7+i*uvlinesize]; + } + *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize); + } +} + +static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ + MpegEncContext * const s = &h->s; + int temp8, i; + uint64_t temp64; + int deblock_left = (s->mb_x > 0); + int deblock_top = (s->mb_y > 0); + + src_y -= linesize + 1; + src_cb -= uvlinesize + 1; + src_cr -= uvlinesize + 1; + +#define XCHG(a,b,t,xchg)\ +t= a;\ +if(xchg)\ + a= b;\ +b= t; + + if(deblock_left){ + for(i = !deblock_top; i<17; i++){ + XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg); + } + } + + if(deblock_top){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); + if(s->mb_x+1 < s->mb_width){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1); + } + } + + if(!(s->flags&CODEC_FLAG_GRAY)){ + if(deblock_left){ + for(i = !deblock_top; i<9; i++){ + XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg); + XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg); + } + } + if(deblock_top){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); + } + } +} + +static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ + MpegEncContext * const s = &h->s; + int i; + + src_y -= 2 * linesize; + src_cb -= 2 * uvlinesize; + src_cr -= 2 * uvlinesize; + + // There are two lines saved, the line above the the top macroblock of a pair, + // and the line above the bottom macroblock + h->left_border[0]= h->top_borders[0][s->mb_x][15]; + h->left_border[1]= h->top_borders[1][s->mb_x][15]; + for(i=2; i<34; i++){ + h->left_border[i]= src_y[15+i* linesize]; + } + + *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7]; + h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7]; + h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7]; + h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7]; + for(i=2; i<18; i++){ + h->left_border[i+34 ]= src_cb[7+i*uvlinesize]; + h->left_border[i+34+18]= src_cr[7+i*uvlinesize]; + } + *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize); + } +} + +static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ + MpegEncContext * const s = &h->s; + int temp8, i; + uint64_t temp64; + int deblock_left = (s->mb_x > 0); + int deblock_top = (s->mb_y > 0); + + tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize); + + src_y -= 2 * linesize + 1; + src_cb -= 2 * uvlinesize + 1; + src_cr -= 2 * uvlinesize + 1; + +#define XCHG(a,b,t,xchg)\ +t= a;\ +if(xchg)\ + a= b;\ +b= t; + + if(deblock_left){ + for(i = (!deblock_top)<<1; i<34; i++){ + XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg); + } + } + + if(deblock_top){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1); + } + + if(!(s->flags&CODEC_FLAG_GRAY)){ + if(deblock_left){ + for(i = (!deblock_top) << 1; i<18; i++){ + XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg); + XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg); + } + } + if(deblock_top){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1); + } + } +} + +static void hl_decode_mb(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_x= s->mb_x; + const int mb_y= s->mb_y; + const int mb_xy= mb_x + mb_y*s->mb_stride; + const int mb_type= s->current_picture.mb_type[mb_xy]; + uint8_t *dest_y, *dest_cb, *dest_cr; + int linesize, uvlinesize /*dct_offset*/; + int i; + int *block_offset = &h->block_offset[0]; + const unsigned int bottom = mb_y & 1; + const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass); + void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + + if(!s->decode) + return; + + dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16; + dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + + if (h->mb_field_decoding_flag) { + linesize = s->linesize * 2; + uvlinesize = s->uvlinesize * 2; + block_offset = &h->block_offset[24]; + if(mb_y&1){ //FIXME move out of this func? + dest_y -= s->linesize*15; + dest_cb-= s->uvlinesize*7; + dest_cr-= s->uvlinesize*7; + } + } else { + linesize = s->linesize; + uvlinesize = s->uvlinesize; +// dct_offset = s->linesize * 16; + } + + if(transform_bypass){ + idct_dc_add = + idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; + }else if(IS_8x8DCT(mb_type)){ + idct_dc_add = s->dsp.h264_idct8_dc_add; + idct_add = s->dsp.h264_idct8_add; + }else{ + idct_dc_add = s->dsp.h264_idct_dc_add; + idct_add = s->dsp.h264_idct_add; + } + + if (IS_INTRA_PCM(mb_type)) { + unsigned int x, y; + + // The pixels are stored in h->mb array in the same order as levels, + // copy them in output in the correct order. + for(i=0; i<16; i++) { + for (y=0; y<4; y++) { + for (x=0; x<4; x++) { + *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x]; + } + } + } + for(i=16; i<16+4; i++) { + for (y=0; y<4; y++) { + for (x=0; x<4; x++) { + *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x]; + } + } + } + for(i=20; i<20+4; i++) { + for (y=0; y<4; y++) { + for (x=0; x<4; x++) { + *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x]; + } + } + } + } else { + if(IS_INTRA(mb_type)){ + if(h->deblocking_filter) { + if (h->mb_aff_frame) { + if (!bottom) + xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1); + } else { + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); + } + } + + if(!(s->flags&CODEC_FLAG_GRAY)){ + h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); + h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); + } + + if(IS_INTRA4x4(mb_type)){ + if(!s->encoding){ + if(IS_8x8DCT(mb_type)){ + for(i=0; i<16; i+=4){ + uint8_t * const ptr= dest_y + block_offset[i]; + const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + const int nnz = h->non_zero_count_cache[ scan8[i] ]; + h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<topright_samples_available<<(i+1))&0x8000, linesize); + if(nnz){ + if(nnz == 1 && h->mb[i*16]) + idct_dc_add(ptr, h->mb + i*16, linesize); + else + idct_add(ptr, h->mb + i*16, linesize); + } + } + }else + for(i=0; i<16; i++){ + uint8_t * const ptr= dest_y + block_offset[i]; + uint8_t *topright; + const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + int nnz, tr; + + if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ + const int topright_avail= (h->topright_samples_available<pred4x4[ dir ](ptr, topright, linesize); + nnz = h->non_zero_count_cache[ scan8[i] ]; + if(nnz){ + if(s->codec_id == CODEC_ID_H264){ + if(nnz == 1 && h->mb[i*16]) + idct_dc_add(ptr, h->mb + i*16, linesize); + else + idct_add(ptr, h->mb + i*16, linesize); + }else + svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); + } + } + } + }else{ + h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); + if(s->codec_id == CODEC_ID_H264){ + if(!transform_bypass) + h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]); + }else + svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); + } + if(h->deblocking_filter) { + if (h->mb_aff_frame) { + if (bottom) { + uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16; + uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8; + uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8; + s->mb_y--; + xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0); + s->mb_y++; + } + } else { + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); + } + } + }else if(s->codec_id == CODEC_ID_H264){ + hl_motion(h, dest_y, dest_cb, dest_cr, + s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, + s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab, + s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab); + } + + + if(!IS_INTRA4x4(mb_type)){ + if(s->codec_id == CODEC_ID_H264){ + if(IS_INTRA16x16(mb_type)){ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ]) + idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + else if(h->mb[i*16]) + idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); + } + }else{ + const int di = IS_8x8DCT(mb_type) ? 4 : 1; + for(i=0; i<16; i+=di){ + int nnz = h->non_zero_count_cache[ scan8[i] ]; + if(nnz){ + if(nnz==1 && h->mb[i*16]) + idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); + else + idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + } + } + } + }else{ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below + uint8_t * const ptr= dest_y + block_offset[i]; + svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); + } + } + } + } + + if(!(s->flags&CODEC_FLAG_GRAY)){ + uint8_t *dest[2] = {dest_cb, dest_cr}; + if(transform_bypass){ + idct_add = idct_dc_add = s->dsp.add_pixels4; + }else{ + idct_add = s->dsp.h264_idct_add; + idct_dc_add = s->dsp.h264_idct_dc_add; + chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]); + chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]); + } + if(s->codec_id == CODEC_ID_H264){ + for(i=16; i<16+8; i++){ + if(h->non_zero_count_cache[ scan8[i] ]) + idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); + else if(h->mb[i*16]) + idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); + } + }else{ + for(i=16; i<16+8; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; + svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + } + } + } + } + } + if(h->deblocking_filter) { + if (h->mb_aff_frame) { + const int mb_y = s->mb_y - 1; + uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr; + const int mb_xy= mb_x + mb_y*s->mb_stride; + const int mb_type_top = s->current_picture.mb_type[mb_xy]; + const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride]; + uint8_t tmp = s->current_picture.data[1][384]; + if (!bottom) return; + pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16; + pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + + backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize); + // TODO deblock a pair + // top + s->mb_y--; + tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y); + fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb + filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize); + if (tmp != s->current_picture.data[1][384]) { + tprintf("modified pixel 8,1 (1)\n"); + } + // bottom + s->mb_y++; + tprintf("call mbaff filter_mb\n"); + fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb + filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + if (tmp != s->current_picture.data[1][384]) { + tprintf("modified pixel 8,1 (2)\n"); + } + } else { + tprintf("call filter_mb\n"); + backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb + filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + } + } +} + +/** + * fills the default_ref_list. + */ +static int fill_default_ref_list(H264Context *h){ + MpegEncContext * const s = &h->s; + int i; + int smallest_poc_greater_than_current = -1; + Picture sorted_short_ref[32]; + + if(h->slice_type==B_TYPE){ + int out_i; + int limit= INT_MIN; + + /* sort frame according to poc in B slice */ + for(out_i=0; out_ishort_ref_count; out_i++){ + int best_i=INT_MIN; + int best_poc=INT_MAX; + + for(i=0; ishort_ref_count; i++){ + const int poc= h->short_ref[i]->poc; + if(poc > limit && poc < best_poc){ + best_poc= poc; + best_i= i; + } + } + + assert(best_i != INT_MIN); + + limit= best_poc; + sorted_short_ref[out_i]= *h->short_ref[best_i]; + tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num); + if (-1 == smallest_poc_greater_than_current) { + if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) { + smallest_poc_greater_than_current = out_i; + } + } + } + } + + if(s->picture_structure == PICT_FRAME){ + if(h->slice_type==B_TYPE){ + int list; + tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current); + + // find the largest poc + for(list=0; list<2; list++){ + int index = 0; + int j= -99; + int step= list ? -1 : 1; + + for(i=0; ishort_ref_count && index < h->ref_count[list]; i++, j+=step) { + while(j<0 || j>= h->short_ref_count){ + if(j != -99 && step == (list ? -1 : 1)) + return -1; + step = -step; + j= smallest_poc_greater_than_current + (step>>1); + } + if(sorted_short_ref[j].reference != 3) continue; + h->default_ref_list[list][index ]= sorted_short_ref[j]; + h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num; + } + + for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){ + if(h->long_ref[i] == NULL) continue; + if(h->long_ref[i]->reference != 3) continue; + + h->default_ref_list[ list ][index ]= *h->long_ref[i]; + h->default_ref_list[ list ][index++].pic_id= i;; + } + + if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){ + // swap the two first elements of L1 when + // L0 and L1 are identical + Picture temp= h->default_ref_list[1][0]; + h->default_ref_list[1][0] = h->default_ref_list[1][1]; + h->default_ref_list[1][1] = temp; + } + + if(index < h->ref_count[ list ]) + memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index)); + } + }else{ + int index=0; + for(i=0; ishort_ref_count; i++){ + if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit + h->default_ref_list[0][index ]= *h->short_ref[i]; + h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num; + } + for(i = 0; i < 16; i++){ + if(h->long_ref[i] == NULL) continue; + if(h->long_ref[i]->reference != 3) continue; + h->default_ref_list[0][index ]= *h->long_ref[i]; + h->default_ref_list[0][index++].pic_id= i;; + } + if(index < h->ref_count[0]) + memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index)); + } + }else{ //FIELD + if(h->slice_type==B_TYPE){ + }else{ + //FIXME second field balh + } + } +#ifdef TRACE + for (i=0; iref_count[0]; i++) { + tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]); + } + if(h->slice_type==B_TYPE){ + for (i=0; iref_count[1]; i++) { + tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]); + } + } +#endif + return 0; +} + +static void print_short_term(H264Context *h); +static void print_long_term(H264Context *h); + +static int decode_ref_pic_list_reordering(H264Context *h){ + MpegEncContext * const s = &h->s; + int list, index; + + print_short_term(h); + print_long_term(h); + if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func + + for(list=0; list<2; list++){ + memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]); + + if(get_bits1(&s->gb)){ + int pred= h->curr_pic_num; + + for(index=0; ; index++){ + int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb); + int pic_id; + int i; + Picture *ref = NULL; + + if(reordering_of_pic_nums_idc==3) + break; + + if(index >= h->ref_count[list]){ + av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n"); + return -1; + } + + if(reordering_of_pic_nums_idc<3){ + if(reordering_of_pic_nums_idc<2){ + const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1; + + if(abs_diff_pic_num >= h->max_pic_num){ + av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n"); + return -1; + } + + if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num; + else pred+= abs_diff_pic_num; + pred &= h->max_pic_num - 1; + + for(i= h->short_ref_count-1; i>=0; i--){ + ref = h->short_ref[i]; + assert(ref->reference == 3); + assert(!ref->long_ref); + if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer + break; + } + if(i>=0) + ref->pic_id= ref->frame_num; + }else{ + pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx + ref = h->long_ref[pic_id]; + ref->pic_id= pic_id; + assert(ref->reference == 3); + assert(ref->long_ref); + i=0; + } + + if (i < 0) { + av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n"); + memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME + } else { + for(i=index; i+1ref_count[list]; i++){ + if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id) + break; + } + for(; i > index; i--){ + h->ref_list[list][i]= h->ref_list[list][i-1]; + } + h->ref_list[list][index]= *ref; + } + }else{ + av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n"); + return -1; + } + } + } + + if(h->slice_type!=B_TYPE) break; + } + for(list=0; list<2; list++){ + for(index= 0; index < h->ref_count[list]; index++){ + if(!h->ref_list[list][index].data[0]) + h->ref_list[list][index]= s->current_picture; + } + if(h->slice_type!=B_TYPE) break; + } + + if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred) + direct_dist_scale_factor(h); + direct_ref_list_init(h); + return 0; +} + +static int pred_weight_table(H264Context *h){ + MpegEncContext * const s = &h->s; + int list, i; + int luma_def, chroma_def; + + h->use_weight= 0; + h->use_weight_chroma= 0; + h->luma_log2_weight_denom= get_ue_golomb(&s->gb); + h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); + luma_def = 1<luma_log2_weight_denom; + chroma_def = 1<chroma_log2_weight_denom; + + for(list=0; list<2; list++){ + for(i=0; iref_count[list]; i++){ + int luma_weight_flag, chroma_weight_flag; + + luma_weight_flag= get_bits1(&s->gb); + if(luma_weight_flag){ + h->luma_weight[list][i]= get_se_golomb(&s->gb); + h->luma_offset[list][i]= get_se_golomb(&s->gb); + if( h->luma_weight[list][i] != luma_def + || h->luma_offset[list][i] != 0) + h->use_weight= 1; + }else{ + h->luma_weight[list][i]= luma_def; + h->luma_offset[list][i]= 0; + } + + chroma_weight_flag= get_bits1(&s->gb); + if(chroma_weight_flag){ + int j; + for(j=0; j<2; j++){ + h->chroma_weight[list][i][j]= get_se_golomb(&s->gb); + h->chroma_offset[list][i][j]= get_se_golomb(&s->gb); + if( h->chroma_weight[list][i][j] != chroma_def + || h->chroma_offset[list][i][j] != 0) + h->use_weight_chroma= 1; + } + }else{ + int j; + for(j=0; j<2; j++){ + h->chroma_weight[list][i][j]= chroma_def; + h->chroma_offset[list][i][j]= 0; + } + } + } + if(h->slice_type != B_TYPE) break; + } + h->use_weight= h->use_weight || h->use_weight_chroma; + return 0; +} + +static void implicit_weight_table(H264Context *h){ + MpegEncContext * const s = &h->s; + int ref0, ref1; + int cur_poc = s->current_picture_ptr->poc; + + if( h->ref_count[0] == 1 && h->ref_count[1] == 1 + && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ + h->use_weight= 0; + h->use_weight_chroma= 0; + return; + } + + h->use_weight= 2; + h->use_weight_chroma= 2; + h->luma_log2_weight_denom= 5; + h->chroma_log2_weight_denom= 5; + + /* FIXME: MBAFF */ + for(ref0=0; ref0 < h->ref_count[0]; ref0++){ + int poc0 = h->ref_list[0][ref0].poc; + for(ref1=0; ref1 < h->ref_count[1]; ref1++){ + int poc1 = h->ref_list[1][ref1].poc; + int td = clip(poc1 - poc0, -128, 127); + if(td){ + int tb = clip(cur_poc - poc0, -128, 127); + int tx = (16384 + (ABS(td) >> 1)) / td; + int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2; + if(dist_scale_factor < -64 || dist_scale_factor > 128) + h->implicit_weight[ref0][ref1] = 32; + else + h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor; + }else + h->implicit_weight[ref0][ref1] = 32; + } + } +} + +static inline void unreference_pic(H264Context *h, Picture *pic){ + int i; + pic->reference=0; + if(pic == h->delayed_output_pic) + pic->reference=1; + else{ + for(i = 0; h->delayed_pic[i]; i++) + if(pic == h->delayed_pic[i]){ + pic->reference=1; + break; + } + } +} + +/** + * instantaneous decoder refresh. + */ +static void idr(H264Context *h){ + int i; + + for(i=0; i<16; i++){ + if (h->long_ref[i] != NULL) { + unreference_pic(h, h->long_ref[i]); + h->long_ref[i]= NULL; + } + } + h->long_ref_count=0; + + for(i=0; ishort_ref_count; i++){ + unreference_pic(h, h->short_ref[i]); + h->short_ref[i]= NULL; + } + h->short_ref_count=0; +} + +/* forget old pics after a seek */ +static void flush_dpb(AVCodecContext *avctx){ + H264Context *h= avctx->priv_data; + int i; + for(i=0; i<16; i++) { + if(h->delayed_pic[i]) + h->delayed_pic[i]->reference= 0; + h->delayed_pic[i]= NULL; + } + if(h->delayed_output_pic) + h->delayed_output_pic->reference= 0; + h->delayed_output_pic= NULL; + idr(h); + if(h->s.current_picture_ptr) + h->s.current_picture_ptr->reference= 0; +} + +/** + * + * @return the removed picture or NULL if an error occurs + */ +static Picture * remove_short(H264Context *h, int frame_num){ + MpegEncContext * const s = &h->s; + int i; + + if(s->avctx->debug&FF_DEBUG_MMCO) + av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count); + + for(i=0; ishort_ref_count; i++){ + Picture *pic= h->short_ref[i]; + if(s->avctx->debug&FF_DEBUG_MMCO) + av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic); + if(pic->frame_num == frame_num){ + h->short_ref[i]= NULL; + memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*)); + h->short_ref_count--; + return pic; + } + } + return NULL; +} + +/** + * + * @return the removed picture or NULL if an error occurs + */ +static Picture * remove_long(H264Context *h, int i){ + Picture *pic; + + pic= h->long_ref[i]; + h->long_ref[i]= NULL; + if(pic) h->long_ref_count--; + + return pic; +} + +/** + * print short term list + */ +static void print_short_term(H264Context *h) { + uint32_t i; + if(h->s.avctx->debug&FF_DEBUG_MMCO) { + av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n"); + for(i=0; ishort_ref_count; i++){ + Picture *pic= h->short_ref[i]; + av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]); + } + } +} + +/** + * print long term list + */ +static void print_long_term(H264Context *h) { + uint32_t i; + if(h->s.avctx->debug&FF_DEBUG_MMCO) { + av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n"); + for(i = 0; i < 16; i++){ + Picture *pic= h->long_ref[i]; + if (pic) { + av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]); + } + } + } +} + +/** + * Executes the reference picture marking (memory management control operations). + */ +static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ + MpegEncContext * const s = &h->s; + int i, j; + int current_is_long=0; + Picture *pic; + + if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0) + av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n"); + + for(i=0; iavctx->debug&FF_DEBUG_MMCO) + av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index); + + switch(mmco[i].opcode){ + case MMCO_SHORT2UNUSED: + pic= remove_short(h, mmco[i].short_frame_num); + if(pic) + unreference_pic(h, pic); + else if(s->avctx->debug&FF_DEBUG_MMCO) + av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n"); + break; + case MMCO_SHORT2LONG: + pic= remove_long(h, mmco[i].long_index); + if(pic) unreference_pic(h, pic); + + h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num); + h->long_ref[ mmco[i].long_index ]->long_ref=1; + h->long_ref_count++; + break; + case MMCO_LONG2UNUSED: + pic= remove_long(h, mmco[i].long_index); + if(pic) + unreference_pic(h, pic); + else if(s->avctx->debug&FF_DEBUG_MMCO) + av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n"); + break; + case MMCO_LONG: + pic= remove_long(h, mmco[i].long_index); + if(pic) unreference_pic(h, pic); + + h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr; + h->long_ref[ mmco[i].long_index ]->long_ref=1; + h->long_ref_count++; + + current_is_long=1; + break; + case MMCO_SET_MAX_LONG: + assert(mmco[i].long_index <= 16); + // just remove the long term which index is greater than new max + for(j = mmco[i].long_index; j<16; j++){ + pic = remove_long(h, j); + if (pic) unreference_pic(h, pic); + } + break; + case MMCO_RESET: + while(h->short_ref_count){ + pic= remove_short(h, h->short_ref[0]->frame_num); + unreference_pic(h, pic); + } + for(j = 0; j < 16; j++) { + pic= remove_long(h, j); + if(pic) unreference_pic(h, pic); + } + break; + default: assert(0); + } + } + + if(!current_is_long){ + pic= remove_short(h, s->current_picture_ptr->frame_num); + if(pic){ + unreference_pic(h, pic); + av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n"); + } + + if(h->short_ref_count) + memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*)); + + h->short_ref[0]= s->current_picture_ptr; + h->short_ref[0]->long_ref=0; + h->short_ref_count++; + } + + print_short_term(h); + print_long_term(h); + return 0; +} + +static int decode_ref_pic_marking(H264Context *h){ + MpegEncContext * const s = &h->s; + int i; + + if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields + s->broken_link= get_bits1(&s->gb) -1; + h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx + if(h->mmco[0].long_index == -1) + h->mmco_index= 0; + else{ + h->mmco[0].opcode= MMCO_LONG; + h->mmco_index= 1; + } + }else{ + if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag + for(i= 0; igb);; + + h->mmco[i].opcode= opcode; + if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){ + h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<sps.log2_max_frame_num)-1); //FIXME fields +/* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){ + av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco); + return -1; + }*/ + } + if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){ + h->mmco[i].long_index= get_ue_golomb(&s->gb); + if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){ + av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode); + return -1; + } + } + + if(opcode > MMCO_LONG){ + av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode); + return -1; + } + if(opcode == MMCO_END) + break; + } + h->mmco_index= i; + }else{ + assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count); + + if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields + h->mmco[0].opcode= MMCO_SHORT2UNUSED; + h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num; + h->mmco_index= 1; + }else + h->mmco_index= 0; + } + } + + return 0; +} + +static int init_poc(H264Context *h){ + MpegEncContext * const s = &h->s; + const int max_frame_num= 1<sps.log2_max_frame_num; + int field_poc[2]; + + if(h->nal_unit_type == NAL_IDR_SLICE){ + h->frame_num_offset= 0; + }else{ + if(h->frame_num < h->prev_frame_num) + h->frame_num_offset= h->prev_frame_num_offset + max_frame_num; + else + h->frame_num_offset= h->prev_frame_num_offset; + } + + if(h->sps.poc_type==0){ + const int max_poc_lsb= 1<sps.log2_max_poc_lsb; + + if(h->nal_unit_type == NAL_IDR_SLICE){ + h->prev_poc_msb= + h->prev_poc_lsb= 0; + } + + if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2) + h->poc_msb = h->prev_poc_msb + max_poc_lsb; + else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2) + h->poc_msb = h->prev_poc_msb - max_poc_lsb; + else + h->poc_msb = h->prev_poc_msb; +//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb); + field_poc[0] = + field_poc[1] = h->poc_msb + h->poc_lsb; + if(s->picture_structure == PICT_FRAME) + field_poc[1] += h->delta_poc_bottom; + }else if(h->sps.poc_type==1){ + int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc; + int i; + + if(h->sps.poc_cycle_length != 0) + abs_frame_num = h->frame_num_offset + h->frame_num; + else + abs_frame_num = 0; + + if(h->nal_ref_idc==0 && abs_frame_num > 0) + abs_frame_num--; + + expected_delta_per_poc_cycle = 0; + for(i=0; i < h->sps.poc_cycle_length; i++) + expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse + + if(abs_frame_num > 0){ + int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length; + int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length; + + expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle; + for(i = 0; i <= frame_num_in_poc_cycle; i++) + expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ]; + } else + expectedpoc = 0; + + if(h->nal_ref_idc == 0) + expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic; + + field_poc[0] = expectedpoc + h->delta_poc[0]; + field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field; + + if(s->picture_structure == PICT_FRAME) + field_poc[1] += h->delta_poc[1]; + }else{ + int poc; + if(h->nal_unit_type == NAL_IDR_SLICE){ + poc= 0; + }else{ + if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num); + else poc= 2*(h->frame_num_offset + h->frame_num) - 1; + } + field_poc[0]= poc; + field_poc[1]= poc; + } + + if(s->picture_structure != PICT_BOTTOM_FIELD) + s->current_picture_ptr->field_poc[0]= field_poc[0]; + if(s->picture_structure != PICT_TOP_FIELD) + s->current_picture_ptr->field_poc[1]= field_poc[1]; + if(s->picture_structure == PICT_FRAME) // FIXME field pix? + s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]); + + return 0; +} + +/** + * decodes a slice header. + * this will allso call MPV_common_init() and frame_start() as needed + */ +static int decode_slice_header(H264Context *h){ + MpegEncContext * const s = &h->s; + int first_mb_in_slice, pps_id; + int num_ref_idx_active_override_flag; + static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE}; + int slice_type; + int default_ref_list_done = 0; + + s->current_picture.reference= h->nal_ref_idc != 0; + s->dropable= h->nal_ref_idc == 0; + + first_mb_in_slice= get_ue_golomb(&s->gb); + + slice_type= get_ue_golomb(&s->gb); + if(slice_type > 9){ + av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); + return -1; + } + if(slice_type > 4){ + slice_type -= 5; + h->slice_type_fixed=1; + }else + h->slice_type_fixed=0; + + slice_type= slice_type_map[ slice_type ]; + if (slice_type == I_TYPE + || (h->slice_num != 0 && slice_type == h->slice_type) ) { + default_ref_list_done = 1; + } + h->slice_type= slice_type; + + s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though + + pps_id= get_ue_golomb(&s->gb); + if(pps_id>255){ + av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n"); + return -1; + } + h->pps= h->pps_buffer[pps_id]; + if(h->pps.slice_group_count == 0){ + av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n"); + return -1; + } + + h->sps= h->sps_buffer[ h->pps.sps_id ]; + if(h->sps.log2_max_frame_num == 0){ + av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n"); + return -1; + } + + if(h->dequant_coeff_pps != pps_id){ + h->dequant_coeff_pps = pps_id; + init_dequant_tables(h); + } + + s->mb_width= h->sps.mb_width; + s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); + + h->b_stride= s->mb_width*4; + h->b8_stride= s->mb_width*2; + + s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right ); + if(h->sps.frame_mbs_only_flag) + s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom); + else + s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck + + if (s->context_initialized + && ( s->width != s->avctx->width || s->height != s->avctx->height)) { + free_tables(h); + MPV_common_end(s); + } + if (!s->context_initialized) { + if (MPV_common_init(s) < 0) + return -1; + + if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly + memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t)); + memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t)); + }else{ + int i; + for(i=0; i<16; i++){ +#define T(x) (x>>2) | ((x<<2) & 0xF) + h->zigzag_scan[i] = T(zigzag_scan[i]); + h-> field_scan[i] = T( field_scan[i]); +#undef T + } + } + if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){ + memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t)); + memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); + }else{ + int i; + for(i=0; i<64; i++){ +#define T(x) (x>>3) | ((x&7)<<3) + h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]); + h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); +#undef T + } + } + if(h->sps.transform_bypass){ //FIXME same ugly + h->zigzag_scan_q0 = zigzag_scan; + h->field_scan_q0 = field_scan; + h->zigzag_scan8x8_q0 = zigzag_scan8x8; + h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; + }else{ + h->zigzag_scan_q0 = h->zigzag_scan; + h->field_scan_q0 = h->field_scan; + h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; + h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; + } + + alloc_tables(h); + + s->avctx->width = s->width; + s->avctx->height = s->height; + s->avctx->sample_aspect_ratio= h->sps.sar; + if(!s->avctx->sample_aspect_ratio.den) + s->avctx->sample_aspect_ratio.den = 1; + + if(h->sps.timing_info_present_flag){ + s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale}; + if(h->x264_build > 0 && h->x264_build < 44) + s->avctx->time_base.den *= 2; + av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den, + s->avctx->time_base.num, s->avctx->time_base.den, 1<<30); + } + } + + if(h->slice_num == 0){ + if(frame_start(h) < 0) + return -1; + } + + s->current_picture_ptr->frame_num= //FIXME frame_num cleanup + h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); + + h->mb_aff_frame = 0; + if(h->sps.frame_mbs_only_flag){ + s->picture_structure= PICT_FRAME; + }else{ + if(get_bits1(&s->gb)) { //field_pic_flag + s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag + } else { + s->picture_structure= PICT_FRAME; + first_mb_in_slice <<= h->sps.mb_aff; + h->mb_aff_frame = h->sps.mb_aff; + } + } + + s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; + s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width; + if(s->mb_y >= s->mb_height){ + return -1; + } + + if(s->picture_structure==PICT_FRAME){ + h->curr_pic_num= h->frame_num; + h->max_pic_num= 1<< h->sps.log2_max_frame_num; + }else{ + h->curr_pic_num= 2*h->frame_num; + h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1); + } + + if(h->nal_unit_type == NAL_IDR_SLICE){ + get_ue_golomb(&s->gb); /* idr_pic_id */ + } + + if(h->sps.poc_type==0){ + h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb); + + if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){ + h->delta_poc_bottom= get_se_golomb(&s->gb); + } + } + + if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){ + h->delta_poc[0]= get_se_golomb(&s->gb); + + if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME) + h->delta_poc[1]= get_se_golomb(&s->gb); + } + + init_poc(h); + + if(h->pps.redundant_pic_cnt_present){ + h->redundant_pic_count= get_ue_golomb(&s->gb); + } + + //set defaults, might be overriden a few line later + h->ref_count[0]= h->pps.ref_count[0]; + h->ref_count[1]= h->pps.ref_count[1]; + + if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){ + if(h->slice_type == B_TYPE){ + h->direct_spatial_mv_pred= get_bits1(&s->gb); + } + num_ref_idx_active_override_flag= get_bits1(&s->gb); + + if(num_ref_idx_active_override_flag){ + h->ref_count[0]= get_ue_golomb(&s->gb) + 1; + if(h->slice_type==B_TYPE) + h->ref_count[1]= get_ue_golomb(&s->gb) + 1; + + if(h->ref_count[0] > 32 || h->ref_count[1] > 32){ + av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n"); + return -1; + } + } + } + + if(!default_ref_list_done){ + fill_default_ref_list(h); + } + + if(decode_ref_pic_list_reordering(h) < 0) + return -1; + + if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) + || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) ) + pred_weight_table(h); + else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE) + implicit_weight_table(h); + else + h->use_weight = 0; + + if(s->current_picture.reference) + decode_ref_pic_marking(h); + + if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ) + h->cabac_init_idc = get_ue_golomb(&s->gb); + + h->last_qscale_diff = 0; + s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); + if(s->qscale<0 || s->qscale>51){ + av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale); + return -1; + } + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); + //FIXME qscale / qp ... stuff + if(h->slice_type == SP_TYPE){ + get_bits1(&s->gb); /* sp_for_switch_flag */ + } + if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){ + get_se_golomb(&s->gb); /* slice_qs_delta */ + } + + h->deblocking_filter = 1; + h->slice_alpha_c0_offset = 0; + h->slice_beta_offset = 0; + if( h->pps.deblocking_filter_parameters_present ) { + h->deblocking_filter= get_ue_golomb(&s->gb); + if(h->deblocking_filter < 2) + h->deblocking_filter^= 1; // 1<->0 + + if( h->deblocking_filter ) { + h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1; + h->slice_beta_offset = get_se_golomb(&s->gb) << 1; + } + } + if( s->avctx->skip_loop_filter >= AVDISCARD_ALL + ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE) + ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE) + ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) + h->deblocking_filter= 0; + +#if 0 //FMO + if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5) + slice_group_change_cycle= get_bits(&s->gb, ?); +#endif + + h->slice_num++; + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n", + h->slice_num, + (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), + first_mb_in_slice, + av_get_pict_type_char(h->slice_type), + pps_id, h->frame_num, + s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], + h->ref_count[0], h->ref_count[1], + s->qscale, + h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2, + h->use_weight, + h->use_weight==1 && h->use_weight_chroma ? "c" : "" + ); + } + + return 0; +} + +/** + * + */ +static inline int get_level_prefix(GetBitContext *gb){ + unsigned int buf; + int log; + + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); + buf=GET_CACHE(re, gb); + + log= 32 - av_log2(buf); +#ifdef TRACE + print_bin(buf>>(32-log), log); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); +#endif + + LAST_SKIP_BITS(re, gb, log); + CLOSE_READER(re, gb); + + return log-1; +} + +static inline int get_dct8x8_allowed(H264Context *h){ + int i; + for(i=0; i<4; i++){ + if(!IS_SUB_8X8(h->sub_mb_type[i]) + || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i]))) + return 0; + } + return 1; +} + +/** + * decodes a residual block. + * @param n block index + * @param scantable scantable + * @param max_coeff number of coefficients in the block + * @return <0 if an error occured + */ +static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){ + MpegEncContext * const s = &h->s; + static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; + int level[16]; + int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before; + + //FIXME put trailing_onex into the context + + if(n == CHROMA_DC_BLOCK_INDEX){ + coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); + total_coeff= coeff_token>>2; + }else{ + if(n == LUMA_DC_BLOCK_INDEX){ + total_coeff= pred_non_zero_count(h, 0); + coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); + total_coeff= coeff_token>>2; + }else{ + total_coeff= pred_non_zero_count(h, n); + coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2); + total_coeff= coeff_token>>2; + h->non_zero_count_cache[ scan8[n] ]= total_coeff; + } + } + + //FIXME set last_non_zero? + + if(total_coeff==0) + return 0; + + trailing_ones= coeff_token&3; + tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff); + assert(total_coeff<=16); + + for(i=0; i 10 && trailing_ones < 3; + int prefix= get_level_prefix(gb); + + //first coefficient has suffix_length equal to 0 or 1 + if(prefix<14){ //FIXME try to build a large unified VLC table for all this + if(suffix_length) + level_code= (prefix<s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + if(trailing_ones < 3) level_code += 2; + + suffix_length = 1; + if(level_code > 5) + suffix_length++; + mask= -(level_code&1); + level[i]= (((2+level_code)>>1) ^ mask) - mask; + i++; + + //remaining coefficients have suffix_length > 0 + for(;is.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + mask= -(level_code&1); + level[i]= (((2+level_code)>>1) ^ mask) - mask; + if(level_code > suffix_limit[suffix_length]) + suffix_length++; + } + } + + if(total_coeff == max_coeff) + zeros_left=0; + else{ + if(n == CHROMA_DC_BLOCK_INDEX) + zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); + else + zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1); + } + + coeff_num = zeros_left + total_coeff - 1; + j = scantable[coeff_num]; + if(n > 24){ + block[j] = level[0]; + for(i=1;i>6; + for(i=1;i>6; + } + } + + if(zeros_left<0){ + av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + return 0; +} + +/** + * decodes a P_SKIP or B_SKIP macroblock + */ +static void decode_mb_skip(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + int mb_type=0; + + memset(h->non_zero_count[mb_xy], 0, 16); + memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui + + if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){ + h->mb_field_decoding_flag= get_bits1(&s->gb); + } + if(h->mb_field_decoding_flag) + mb_type|= MB_TYPE_INTERLACED; + + if( h->slice_type == B_TYPE ) + { + // just for fill_caches. pred_direct_motion will set the real mb_type + mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; + + fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... + pred_direct_motion(h, &mb_type); + if(h->pps.cabac){ + fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); + } + } + else + { + int mx, my; + mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + + fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... + pred_pskip_motion(h, &mx, &my); + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); + fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + if(h->pps.cabac) + fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); + } + + write_back_motion(h, mb_type); + s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP; + s->current_picture.qscale_table[mb_xy]= s->qscale; + h->slice_table[ mb_xy ]= h->slice_num; + h->prev_mb_skipped= 1; +} + +/** + * decodes a macroblock + * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed + */ +static int decode_mb_cavlc(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + int mb_type, partition_count, cbp; + int dct8x8_allowed= h->pps.transform_8x8_mode; + + s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong? + + tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); + cbp = 0; /* avoid warning. FIXME: find a solution without slowing + down the code */ + if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){ + if(s->mb_skip_run==-1) + s->mb_skip_run= get_ue_golomb(&s->gb); + + if (s->mb_skip_run--) { + decode_mb_skip(h); + return 0; + } + } + if(h->mb_aff_frame){ + if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped) + h->mb_field_decoding_flag = get_bits1(&s->gb); + }else + h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME); + + h->prev_mb_skipped= 0; + + mb_type= get_ue_golomb(&s->gb); + if(h->slice_type == B_TYPE){ + if(mb_type < 23){ + partition_count= b_mb_type_info[mb_type].partition_count; + mb_type= b_mb_type_info[mb_type].type; + }else{ + mb_type -= 23; + goto decode_intra_mb; + } + }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){ + if(mb_type < 5){ + partition_count= p_mb_type_info[mb_type].partition_count; + mb_type= p_mb_type_info[mb_type].type; + }else{ + mb_type -= 5; + goto decode_intra_mb; + } + }else{ + assert(h->slice_type == I_TYPE); +decode_intra_mb: + if(mb_type > 25){ + av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y); + return -1; + } + partition_count=0; + cbp= i_mb_type_info[mb_type].cbp; + h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; + mb_type= i_mb_type_info[mb_type].type; + } + + if(h->mb_field_decoding_flag) + mb_type |= MB_TYPE_INTERLACED; + + h->slice_table[ mb_xy ]= h->slice_num; + + if(IS_INTRA_PCM(mb_type)){ + unsigned int x, y; + + // we assume these blocks are very rare so we dont optimize it + align_get_bits(&s->gb); + + // The pixels are stored in the same order as levels in h->mb array. + for(y=0; y<16; y++){ + const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3); + for(x=0; x<16; x++){ + tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8)); + h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8); + } + } + for(y=0; y<8; y++){ + const int index= 256 + 4*(y&3) + 32*(y>>2); + for(x=0; x<8; x++){ + tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8)); + h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8); + } + } + for(y=0; y<8; y++){ + const int index= 256 + 64 + 4*(y&3) + 32*(y>>2); + for(x=0; x<8; x++){ + tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8)); + h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8); + } + } + + // In deblocking, the quantizer is 0 + s->current_picture.qscale_table[mb_xy]= 0; + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0); + // All coeffs are present + memset(h->non_zero_count[mb_xy], 16, 16); + + s->current_picture.mb_type[mb_xy]= mb_type; + return 0; + } + + fill_caches(h, mb_type, 0); + + //mb_pred + if(IS_INTRA(mb_type)){ +// init_top_left_availability(h); + if(IS_INTRA4x4(mb_type)){ + int i; + int di = 1; + if(dct8x8_allowed && get_bits1(&s->gb)){ + mb_type |= MB_TYPE_8x8DCT; + di = 4; + } + +// fill_intra4x4_pred_table(h); + for(i=0; i<16; i+=di){ + const int mode_coded= !get_bits1(&s->gb); + const int predicted_mode= pred_intra_mode(h, i); + int mode; + + if(mode_coded){ + const int rem_mode= get_bits(&s->gb, 3); + if(rem_modeintra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); + else + h->intra4x4_pred_mode_cache[ scan8[i] ] = mode; + } + write_back_intra_pred_mode(h); + if( check_intra4x4_pred_mode(h) < 0) + return -1; + }else{ + h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode); + if(h->intra16x16_pred_mode < 0) + return -1; + } + h->chroma_pred_mode= get_ue_golomb(&s->gb); + + h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode); + if(h->chroma_pred_mode < 0) + return -1; + }else if(partition_count==4){ + int i, j, sub_partition_count[4], list, ref[2][4]; + + if(h->slice_type == B_TYPE){ + for(i=0; i<4; i++){ + h->sub_mb_type[i]= get_ue_golomb(&s->gb); + if(h->sub_mb_type[i] >=13){ + av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); + return -1; + } + sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; + h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; + } + if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) + || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) { + pred_direct_motion(h, &mb_type); + h->ref_cache[0][scan8[4]] = + h->ref_cache[1][scan8[4]] = + h->ref_cache[0][scan8[12]] = + h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE; + } + }else{ + assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ? + for(i=0; i<4; i++){ + h->sub_mb_type[i]= get_ue_golomb(&s->gb); + if(h->sub_mb_type[i] >=4){ + av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y); + return -1; + } + sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; + h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; + } + } + + for(list=0; list<2; list++){ + int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; + if(ref_count == 0) continue; + if (h->mb_aff_frame && h->mb_field_decoding_flag) { + ref_count <<= 1; + } + for(i=0; i<4; i++){ + if(IS_DIRECT(h->sub_mb_type[i])) continue; + if(IS_DIR(h->sub_mb_type[i], 0, list)){ + ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip? + }else{ + //FIXME + ref[list][i] = -1; + } + } + } + + if(dct8x8_allowed) + dct8x8_allowed = get_dct8x8_allowed(h); + + for(list=0; list<2; list++){ + const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; + if(ref_count == 0) continue; + + for(i=0; i<4; i++){ + if(IS_DIRECT(h->sub_mb_type[i])) { + h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ]; + continue; + } + h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]= + h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; + + if(IS_DIR(h->sub_mb_type[i], 0, list)){ + const int sub_mb_type= h->sub_mb_type[i]; + const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; + for(j=0; jmv_cache[list][ scan8[index] ]; + pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my); + mx += get_se_golomb(&s->gb); + my += get_se_golomb(&s->gb); + tprintf("final mv:%d %d\n", mx, my); + + if(IS_SUB_8X8(sub_mb_type)){ + mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= + mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; + mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= + mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; + }else if(IS_SUB_8X4(sub_mb_type)){ + mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx; + mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my; + }else if(IS_SUB_4X8(sub_mb_type)){ + mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx; + mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my; + }else{ + assert(IS_SUB_4X4(sub_mb_type)); + mv_cache[ 0 ][0]= mx; + mv_cache[ 0 ][1]= my; + } + } + }else{ + uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; + p[0] = p[1]= + p[8] = p[9]= 0; + } + } + } + }else if(IS_DIRECT(mb_type)){ + pred_direct_motion(h, &mb_type); + dct8x8_allowed &= h->sps.direct_8x8_inference_flag; + }else{ + int list, mx, my, i; + //FIXME we should set ref_idx_l? to 0 if we use that later ... + if(IS_16X16(mb_type)){ + for(list=0; list<2; list++){ + if(h->ref_count[list]>0){ + if(IS_DIR(mb_type, 0, list)){ + const int val= get_te0_golomb(&s->gb, h->ref_count[list]); + fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1); + } + } + for(list=0; list<2; list++){ + if(IS_DIR(mb_type, 0, list)){ + pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my); + mx += get_se_golomb(&s->gb); + my += get_se_golomb(&s->gb); + tprintf("final mv:%d %d\n", mx, my); + + fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); + }else + fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); + } + } + else if(IS_16X8(mb_type)){ + for(list=0; list<2; list++){ + if(h->ref_count[list]>0){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ + const int val= get_te0_golomb(&s->gb, h->ref_count[list]); + fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1); + } + } + } + for(list=0; list<2; list++){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ + pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my); + mx += get_se_golomb(&s->gb); + my += get_se_golomb(&s->gb); + tprintf("final mv:%d %d\n", mx, my); + + fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); + }else + fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); + } + } + }else{ + assert(IS_8X16(mb_type)); + for(list=0; list<2; list++){ + if(h->ref_count[list]>0){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ //FIXME optimize + const int val= get_te0_golomb(&s->gb, h->ref_count[list]); + fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1); + } + } + } + for(list=0; list<2; list++){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ + pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my); + mx += get_se_golomb(&s->gb); + my += get_se_golomb(&s->gb); + tprintf("final mv:%d %d\n", mx, my); + + fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); + }else + fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); + } + } + } + } + + if(IS_INTER(mb_type)) + write_back_motion(h, mb_type); + + if(!IS_INTRA16x16(mb_type)){ + cbp= get_ue_golomb(&s->gb); + if(cbp > 47){ + av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y); + return -1; + } + + if(IS_INTRA4x4(mb_type)) + cbp= golomb_to_intra4x4_cbp[cbp]; + else + cbp= golomb_to_inter_cbp[cbp]; + } + + if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ + if(get_bits1(&s->gb)) + mb_type |= MB_TYPE_8x8DCT; + } + s->current_picture.mb_type[mb_xy]= mb_type; + + if(cbp || IS_INTRA16x16(mb_type)){ + int i8x8, i4x4, chroma_idx; + int chroma_qp, dquant; + GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; + const uint8_t *scan, *scan8x8, *dc_scan; + +// fill_non_zero_count_cache(h); + + if(IS_INTERLACED(mb_type)){ + scan= s->qscale ? h->field_scan : h->field_scan_q0; + dc_scan= luma_dc_field_scan; + }else{ + scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; + dc_scan= luma_dc_zigzag_scan; + } + scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; + + dquant= get_se_golomb(&s->gb); + + if( dquant > 25 || dquant < -26 ){ + av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y); + return -1; + } + + s->qscale += dquant; + if(((unsigned)s->qscale) > 51){ + if(s->qscale<0) s->qscale+= 52; + else s->qscale-= 52; + } + + h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); + if(IS_INTRA16x16(mb_type)){ + if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ + return -1; //FIXME continue if partitioned and other return -1 too + } + + assert((cbp&15) == 0 || (cbp&15) == 15); + + if(cbp&15){ + for(i8x8=0; i8x8<4; i8x8++){ + for(i4x4=0; i4x4<4; i4x4++){ + const int index= i4x4 + 4*i8x8; + if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){ + return -1; + } + } + } + }else{ + fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); + } + }else{ + for(i8x8=0; i8x8<4; i8x8++){ + if(cbp & (1<mb[64*i8x8]; + uint8_t *nnz; + for(i4x4=0; i4x4<4; i4x4++){ + if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, + h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) + return -1; + } + nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; + nnz[0] += nnz[1] + nnz[8] + nnz[9]; + }else{ + for(i4x4=0; i4x4<4; i4x4++){ + const int index= i4x4 + 4*i8x8; + + if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){ + return -1; + } + } + } + }else{ + uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; + nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; + } + } + } + + if(cbp&0x30){ + for(chroma_idx=0; chroma_idx<2; chroma_idx++) + if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){ + return -1; + } + } + + if(cbp&0x20){ + for(chroma_idx=0; chroma_idx<2; chroma_idx++){ + for(i4x4=0; i4x4<4; i4x4++){ + const int index= 16 + 4*chroma_idx + i4x4; + if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){ + return -1; + } + } + } + }else{ + uint8_t * const nnz= &h->non_zero_count_cache[0]; + nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = + nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + } + }else{ + uint8_t * const nnz= &h->non_zero_count_cache[0]; + fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); + nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = + nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + } + s->current_picture.qscale_table[mb_xy]= s->qscale; + write_back_non_zero_count(h); + + return 0; +} + +static int decode_cabac_field_decoding_flag(H264Context *h) { + MpegEncContext * const s = &h->s; + const int mb_x = s->mb_x; + const int mb_y = s->mb_y & ~1; + const int mba_xy = mb_x - 1 + mb_y *s->mb_stride; + const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride; + + unsigned int ctx = 0; + + if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) { + ctx += 1; + } + if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) { + ctx += 1; + } + + return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] ); +} + +static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { + uint8_t *state= &h->cabac_state[ctx_base]; + int mb_type; + + if(intra_slice){ + MpegEncContext * const s = &h->s; + const int mba_xy = h->left_mb_xy[0]; + const int mbb_xy = h->top_mb_xy; + int ctx=0; + if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) ) + ctx++; + if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) + ctx++; + if( get_cabac( &h->cabac, &state[ctx] ) == 0 ) + return 0; /* I4x4 */ + state += 2; + }else{ + if( get_cabac( &h->cabac, &state[0] ) == 0 ) + return 0; /* I4x4 */ + } + + if( get_cabac_terminate( &h->cabac ) ) + return 25; /* PCM */ + + mb_type = 1; /* I16x16 */ + mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */ + if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */ + mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] ); + mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] ); + mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] ); + return mb_type; +} + +static int decode_cabac_mb_type( H264Context *h ) { + MpegEncContext * const s = &h->s; + + if( h->slice_type == I_TYPE ) { + return decode_cabac_intra_mb_type(h, 3, 1); + } else if( h->slice_type == P_TYPE ) { + if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { + /* P-type */ + if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) { + /* P_L0_D16x16, P_8x8 */ + return 3 * get_cabac( &h->cabac, &h->cabac_state[16] ); + } else { + /* P_L0_D8x16, P_L0_D16x8 */ + return 2 - get_cabac( &h->cabac, &h->cabac_state[17] ); + } + } else { + return decode_cabac_intra_mb_type(h, 17, 0) + 5; + } + } else if( h->slice_type == B_TYPE ) { + const int mba_xy = h->left_mb_xy[0]; + const int mbb_xy = h->top_mb_xy; + int ctx = 0; + int bits; + + if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) + ctx++; + if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) + ctx++; + + if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) + return 0; /* B_Direct_16x16 */ + + if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) { + return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ + } + + bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3; + bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2; + bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1; + bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ); + if( bits < 8 ) + return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ + else if( bits == 13 ) { + return decode_cabac_intra_mb_type(h, 32, 0) + 23; + } else if( bits == 14 ) + return 11; /* B_L1_L0_8x16 */ + else if( bits == 15 ) + return 22; /* B_8x8 */ + + bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] ); + return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ + } else { + /* TODO SI/SP frames? */ + return -1; + } +} + +static int decode_cabac_mb_skip( H264Context *h) { + MpegEncContext * const s = &h->s; + const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; + const int mba_xy = mb_xy - 1; + const int mbb_xy = mb_xy - s->mb_stride; + int ctx = 0; + + if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )) + ctx++; + if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) + ctx++; + + if( h->slice_type == B_TYPE ) + ctx += 13; + return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); +} + +static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { + int mode = 0; + + if( get_cabac( &h->cabac, &h->cabac_state[68] ) ) + return pred_mode; + + mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] ); + mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] ); + mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] ); + + if( mode >= pred_mode ) + return mode + 1; + else + return mode; +} + +static int decode_cabac_mb_chroma_pre_mode( H264Context *h) { + const int mba_xy = h->left_mb_xy[0]; + const int mbb_xy = h->top_mb_xy; + + int ctx = 0; + + /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */ + if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 ) + ctx++; + + if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) + ctx++; + + if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) + return 0; + + if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) + return 1; + if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) + return 2; + else + return 3; +} + +static const uint8_t block_idx_x[16] = { + 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3 +}; +static const uint8_t block_idx_y[16] = { + 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 +}; +static const uint8_t block_idx_xy[4][4] = { + { 0, 2, 8, 10}, + { 1, 3, 9, 11}, + { 4, 6, 12, 14}, + { 5, 7, 13, 15} +}; + +static int decode_cabac_mb_cbp_luma( H264Context *h) { + int cbp = 0; + int cbp_b = -1; + int i8x8; + + if( h->slice_table[h->top_mb_xy] == h->slice_num ) { + cbp_b = h->top_cbp; + tprintf("cbp_b = top_cbp = %x\n", cbp_b); + } + + for( i8x8 = 0; i8x8 < 4; i8x8++ ) { + int cbp_a = -1; + int x, y; + int ctx = 0; + + x = block_idx_x[4*i8x8]; + y = block_idx_y[4*i8x8]; + + if( x > 0 ) + cbp_a = cbp; + else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) { + cbp_a = h->left_cbp; + tprintf("cbp_a = left_cbp = %x\n", cbp_a); + } + + if( y > 0 ) + cbp_b = cbp; + + /* No need to test for skip as we put 0 for skip block */ + /* No need to test for IPCM as we put 1 for IPCM block */ + if( cbp_a >= 0 ) { + int i8x8a = block_idx_xy[(x-1)&0x03][y]/4; + if( ((cbp_a >> i8x8a)&0x01) == 0 ) + ctx++; + } + + if( cbp_b >= 0 ) { + int i8x8b = block_idx_xy[x][(y-1)&0x03]/4; + if( ((cbp_b >> i8x8b)&0x01) == 0 ) + ctx += 2; + } + + if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) { + cbp |= 1 << i8x8; + } + } + return cbp; +} +static int decode_cabac_mb_cbp_chroma( H264Context *h) { + int ctx; + int cbp_a, cbp_b; + + cbp_a = (h->left_cbp>>4)&0x03; + cbp_b = (h-> top_cbp>>4)&0x03; + + ctx = 0; + if( cbp_a > 0 ) ctx++; + if( cbp_b > 0 ) ctx += 2; + if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) + return 0; + + ctx = 4; + if( cbp_a == 2 ) ctx++; + if( cbp_b == 2 ) ctx += 2; + return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ); +} +static int decode_cabac_mb_dqp( H264Context *h) { + MpegEncContext * const s = &h->s; + int mbn_xy; + int ctx = 0; + int val = 0; + + if( s->mb_x > 0 ) + mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1; + else + mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride; + + if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) ) + ctx++; + + while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) { + if( ctx < 2 ) + ctx = 2; + else + ctx = 3; + val++; + if(val > 102) //prevent infinite loop + return INT_MIN; + } + + if( val&0x01 ) + return (val + 1)/2; + else + return -(val + 1)/2; +} +static int decode_cabac_p_mb_sub_type( H264Context *h ) { + if( get_cabac( &h->cabac, &h->cabac_state[21] ) ) + return 0; /* 8x8 */ + if( !get_cabac( &h->cabac, &h->cabac_state[22] ) ) + return 1; /* 8x4 */ + if( get_cabac( &h->cabac, &h->cabac_state[23] ) ) + return 2; /* 4x8 */ + return 3; /* 4x4 */ +} +static int decode_cabac_b_mb_sub_type( H264Context *h ) { + int type; + if( !get_cabac( &h->cabac, &h->cabac_state[36] ) ) + return 0; /* B_Direct_8x8 */ + if( !get_cabac( &h->cabac, &h->cabac_state[37] ) ) + return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */ + type = 3; + if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) { + if( get_cabac( &h->cabac, &h->cabac_state[39] ) ) + return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */ + type += 4; + } + type += 2*get_cabac( &h->cabac, &h->cabac_state[39] ); + type += get_cabac( &h->cabac, &h->cabac_state[39] ); + return type; +} + +static inline int decode_cabac_mb_transform_size( H264Context *h ) { + return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); +} + +static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { + int refa = h->ref_cache[list][scan8[n] - 1]; + int refb = h->ref_cache[list][scan8[n] - 8]; + int ref = 0; + int ctx = 0; + + if( h->slice_type == B_TYPE) { + if( refa > 0 && !h->direct_cache[scan8[n] - 1] ) + ctx++; + if( refb > 0 && !h->direct_cache[scan8[n] - 8] ) + ctx += 2; + } else { + if( refa > 0 ) + ctx++; + if( refb > 0 ) + ctx += 2; + } + + while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) { + ref++; + if( ctx < 4 ) + ctx = 4; + else + ctx = 5; + } + return ref; +} + +static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { + int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) + + abs( h->mvd_cache[list][scan8[n] - 8][l] ); + int ctxbase = (l == 0) ? 40 : 47; + int ctx, mvd; + + if( amvd < 3 ) + ctx = 0; + else if( amvd > 32 ) + ctx = 2; + else + ctx = 1; + + if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx])) + return 0; + + mvd= 1; + ctx= 3; + while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) { + mvd++; + if( ctx < 6 ) + ctx++; + } + + if( mvd >= 9 ) { + int k = 3; + while( get_cabac_bypass( &h->cabac ) ) { + mvd += 1 << k; + k++; + } + while( k-- ) { + if( get_cabac_bypass( &h->cabac ) ) + mvd += 1 << k; + } + } + if( get_cabac_bypass( &h->cabac ) ) return -mvd; + else return mvd; +} + +static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { + int nza, nzb; + int ctx = 0; + + if( cat == 0 ) { + nza = h->left_cbp&0x100; + nzb = h-> top_cbp&0x100; + } else if( cat == 1 || cat == 2 ) { + nza = h->non_zero_count_cache[scan8[idx] - 1]; + nzb = h->non_zero_count_cache[scan8[idx] - 8]; + } else if( cat == 3 ) { + nza = (h->left_cbp>>(6+idx))&0x01; + nzb = (h-> top_cbp>>(6+idx))&0x01; + } else { + assert(cat == 4); + nza = h->non_zero_count_cache[scan8[16+idx] - 1]; + nzb = h->non_zero_count_cache[scan8[16+idx] - 8]; + } + + if( nza > 0 ) + ctx++; + + if( nzb > 0 ) + ctx += 2; + + return ctx + 4 * cat; +} + +static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) { + const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; + static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; + static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; + static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 }; + static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 }; + static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 }; + static const int significant_coeff_flag_offset_8x8[63] = { + 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, + 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, + 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 + }; + static const int last_coeff_flag_offset_8x8[63] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 + }; + + int index[64]; + + int i, last; + int coeff_count = 0; + + int abslevel1 = 1; + int abslevelgt1 = 0; + + uint8_t *significant_coeff_ctx_base; + uint8_t *last_coeff_ctx_base; + uint8_t *abs_level_m1_ctx_base; + + /* cat: 0-> DC 16x16 n = 0 + * 1-> AC 16x16 n = luma4x4idx + * 2-> Luma4x4 n = luma4x4idx + * 3-> DC Chroma n = iCbCr + * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx + * 5-> Luma8x8 n = 4 * luma8x8idx + */ + + /* read coded block flag */ + if( cat != 5 ) { + if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) { + if( cat == 1 || cat == 2 ) + h->non_zero_count_cache[scan8[n]] = 0; + else if( cat == 4 ) + h->non_zero_count_cache[scan8[16+n]] = 0; + + return 0; + } + } + + significant_coeff_ctx_base = h->cabac_state + + significant_coeff_flag_offset[cat] + + significant_coeff_flag_field_offset[h->mb_field_decoding_flag]; + last_coeff_ctx_base = h->cabac_state + + last_significant_coeff_flag_offset[cat] + + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag]; + abs_level_m1_ctx_base = h->cabac_state + + coeff_abs_level_m1_offset[cat]; + + if( cat == 5 ) { +#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \ + for(last= 0; last < coefs; last++) { \ + uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \ + if( get_cabac( &h->cabac, sig_ctx )) { \ + uint8_t *last_ctx = last_coeff_ctx_base + last_off; \ + index[coeff_count++] = last; \ + if( get_cabac( &h->cabac, last_ctx ) ) { \ + last= max_coeff; \ + break; \ + } \ + } \ + } + DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last], + last_coeff_flag_offset_8x8[last] ); + } else { + DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); + } + if( last == max_coeff -1 ) { + index[coeff_count++] = last; + } + assert(coeff_count > 0); + + if( cat == 0 ) + h->cbp_table[mb_xy] |= 0x100; + else if( cat == 1 || cat == 2 ) + h->non_zero_count_cache[scan8[n]] = coeff_count; + else if( cat == 3 ) + h->cbp_table[mb_xy] |= 0x40 << n; + else if( cat == 4 ) + h->non_zero_count_cache[scan8[16+n]] = coeff_count; + else { + assert( cat == 5 ); + fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); + } + + for( i = coeff_count - 1; i >= 0; i-- ) { + uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base; + int j= scantable[index[i]]; + + if( get_cabac( &h->cabac, ctx ) == 0 ) { + if( !qmul ) { + if( get_cabac_bypass( &h->cabac ) ) block[j] = -1; + else block[j] = 1; + }else{ + if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6; + else block[j] = ( qmul[j] + 32) >> 6; + } + + abslevel1++; + } else { + int coeff_abs = 2; + ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base; + while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) { + coeff_abs++; + } + + if( coeff_abs >= 15 ) { + int j = 0; + while( get_cabac_bypass( &h->cabac ) ) { + coeff_abs += 1 << j; + j++; + } + + while( j-- ) { + if( get_cabac_bypass( &h->cabac ) ) + coeff_abs += 1 << j ; + } + } + + if( !qmul ) { + if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs; + else block[j] = coeff_abs; + }else{ + if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6; + else block[j] = ( coeff_abs * qmul[j] + 32) >> 6; + } + + abslevelgt1++; + } + } + return 0; +} + +static void inline compute_mb_neighbors(H264Context *h) +{ + MpegEncContext * const s = &h->s; + const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; + h->top_mb_xy = mb_xy - s->mb_stride; + h->left_mb_xy[0] = mb_xy - 1; + if(h->mb_aff_frame){ + const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; + const int top_pair_xy = pair_xy - s->mb_stride; + const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); + const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); + const int curr_mb_frame_flag = !h->mb_field_decoding_flag; + const int bottom = (s->mb_y & 1); + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock + ) { + h->top_mb_xy -= s->mb_stride; + } + if (left_mb_frame_flag != curr_mb_frame_flag) { + h->left_mb_xy[0] = pair_xy - 1; + } + } + return; +} + +/** + * decodes a macroblock + * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed + */ +static int decode_mb_cabac(H264Context *h) { + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + int mb_type, partition_count, cbp = 0; + int dct8x8_allowed= h->pps.transform_8x8_mode; + + s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?) + + tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); + if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) { + /* read skip flags */ + if( decode_cabac_mb_skip( h ) ) { + decode_mb_skip(h); + + h->cbp_table[mb_xy] = 0; + h->chroma_pred_mode_table[mb_xy] = 0; + h->last_qscale_diff = 0; + + return 0; + + } + } + if(h->mb_aff_frame){ + if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped) + h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h); + }else + h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME); + + h->prev_mb_skipped = 0; + + compute_mb_neighbors(h); + if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) { + av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" ); + return -1; + } + + if( h->slice_type == B_TYPE ) { + if( mb_type < 23 ){ + partition_count= b_mb_type_info[mb_type].partition_count; + mb_type= b_mb_type_info[mb_type].type; + }else{ + mb_type -= 23; + goto decode_intra_mb; + } + } else if( h->slice_type == P_TYPE ) { + if( mb_type < 5) { + partition_count= p_mb_type_info[mb_type].partition_count; + mb_type= p_mb_type_info[mb_type].type; + } else { + mb_type -= 5; + goto decode_intra_mb; + } + } else { + assert(h->slice_type == I_TYPE); +decode_intra_mb: + partition_count = 0; + cbp= i_mb_type_info[mb_type].cbp; + h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; + mb_type= i_mb_type_info[mb_type].type; + } + if(h->mb_field_decoding_flag) + mb_type |= MB_TYPE_INTERLACED; + + h->slice_table[ mb_xy ]= h->slice_num; + + if(IS_INTRA_PCM(mb_type)) { + const uint8_t *ptr; + unsigned int x, y; + + // We assume these blocks are very rare so we dont optimize it. + // FIXME The two following lines get the bitstream position in the cabac + // decode, I think it should be done by a function in cabac.h (or cabac.c). + ptr= h->cabac.bytestream; + if (h->cabac.low&0x1) ptr-=CABAC_BITS/8; + + // The pixels are stored in the same order as levels in h->mb array. + for(y=0; y<16; y++){ + const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3); + for(x=0; x<16; x++){ + tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr); + h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++; + } + } + for(y=0; y<8; y++){ + const int index= 256 + 4*(y&3) + 32*(y>>2); + for(x=0; x<8; x++){ + tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr); + h->mb[index + (x&3) + 16*(x>>2)]= *ptr++; + } + } + for(y=0; y<8; y++){ + const int index= 256 + 64 + 4*(y&3) + 32*(y>>2); + for(x=0; x<8; x++){ + tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr); + h->mb[index + (x&3) + 16*(x>>2)]= *ptr++; + } + } + + ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr); + + // All blocks are present + h->cbp_table[mb_xy] = 0x1ef; + h->chroma_pred_mode_table[mb_xy] = 0; + // In deblocking, the quantizer is 0 + s->current_picture.qscale_table[mb_xy]= 0; + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0); + // All coeffs are present + memset(h->non_zero_count[mb_xy], 16, 16); + s->current_picture.mb_type[mb_xy]= mb_type; + return 0; + } + + fill_caches(h, mb_type, 0); + + if( IS_INTRA( mb_type ) ) { + int i; + if( IS_INTRA4x4( mb_type ) ) { + if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) { + mb_type |= MB_TYPE_8x8DCT; + for( i = 0; i < 16; i+=4 ) { + int pred = pred_intra_mode( h, i ); + int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred ); + fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); + } + } else { + for( i = 0; i < 16; i++ ) { + int pred = pred_intra_mode( h, i ); + h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred ); + + //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] ); + } + } + write_back_intra_pred_mode(h); + if( check_intra4x4_pred_mode(h) < 0 ) return -1; + } else { + h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode ); + if( h->intra16x16_pred_mode < 0 ) return -1; + } + h->chroma_pred_mode_table[mb_xy] = + h->chroma_pred_mode = decode_cabac_mb_chroma_pre_mode( h ); + + h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode ); + if( h->chroma_pred_mode < 0 ) return -1; + } else if( partition_count == 4 ) { + int i, j, sub_partition_count[4], list, ref[2][4]; + + if( h->slice_type == B_TYPE ) { + for( i = 0; i < 4; i++ ) { + h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h ); + sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; + h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; + } + if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) + || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) { + pred_direct_motion(h, &mb_type); + if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) { + for( i = 0; i < 4; i++ ) + if( IS_DIRECT(h->sub_mb_type[i]) ) + fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 ); + } + } + } else { + for( i = 0; i < 4; i++ ) { + h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h ); + sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; + h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; + } + } + + for( list = 0; list < 2; list++ ) { + if( h->ref_count[list] > 0 ) { + for( i = 0; i < 4; i++ ) { + if(IS_DIRECT(h->sub_mb_type[i])) continue; + if(IS_DIR(h->sub_mb_type[i], 0, list)){ + if( h->ref_count[list] > 1 ) + ref[list][i] = decode_cabac_mb_ref( h, list, 4*i ); + else + ref[list][i] = 0; + } else { + ref[list][i] = -1; + } + h->ref_cache[list][ scan8[4*i]+1 ]= + h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; + } + } + } + + if(dct8x8_allowed) + dct8x8_allowed = get_dct8x8_allowed(h); + + for(list=0; list<2; list++){ + for(i=0; i<4; i++){ + if(IS_DIRECT(h->sub_mb_type[i])){ + fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4); + continue; + } + h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]; + + if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ + const int sub_mb_type= h->sub_mb_type[i]; + const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; + for(j=0; jmv_cache[list][ scan8[index] ]; + int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ]; + pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy); + + mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 ); + my = mpy + decode_cabac_mb_mvd( h, list, index, 1 ); + tprintf("final mv:%d %d\n", mx, my); + + if(IS_SUB_8X8(sub_mb_type)){ + mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= + mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx; + mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= + mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my; + + mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= + mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx; + mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= + mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy; + }else if(IS_SUB_8X4(sub_mb_type)){ + mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx; + mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my; + + mvd_cache[ 0 ][0]= mvd_cache[ 1 ][0]= mx- mpx; + mvd_cache[ 0 ][1]= mvd_cache[ 1 ][1]= my - mpy; + }else if(IS_SUB_4X8(sub_mb_type)){ + mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx; + mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my; + + mvd_cache[ 0 ][0]= mvd_cache[ 8 ][0]= mx - mpx; + mvd_cache[ 0 ][1]= mvd_cache[ 8 ][1]= my - mpy; + }else{ + assert(IS_SUB_4X4(sub_mb_type)); + mv_cache[ 0 ][0]= mx; + mv_cache[ 0 ][1]= my; + + mvd_cache[ 0 ][0]= mx - mpx; + mvd_cache[ 0 ][1]= my - mpy; + } + } + }else{ + uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0]; + uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0]; + p[0] = p[1] = p[8] = p[9] = 0; + pd[0]= pd[1]= pd[8]= pd[9]= 0; + } + } + } + } else if( IS_DIRECT(mb_type) ) { + pred_direct_motion(h, &mb_type); + fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); + dct8x8_allowed &= h->sps.direct_8x8_inference_flag; + } else { + int list, mx, my, i, mpx, mpy; + if(IS_16X16(mb_type)){ + for(list=0; list<2; list++){ + if(IS_DIR(mb_type, 0, list)){ + if(h->ref_count[list] > 0 ){ + const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0; + fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1); + } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); + } + for(list=0; list<2; list++){ + if(IS_DIR(mb_type, 0, list)){ + pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy); + + mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 ); + my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 ); + tprintf("final mv:%d %d\n", mx, my); + + fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4); + fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); + }else + fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); + } + } + else if(IS_16X8(mb_type)){ + for(list=0; list<2; list++){ + if(h->ref_count[list]>0){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ + const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0; + fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1); + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1); + } + } + } + for(list=0; list<2; list++){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ + pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy); + mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 ); + my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 ); + tprintf("final mv:%d %d\n", mx, my); + + fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4); + fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); + }else{ + fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); + fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); + } + } + } + }else{ + assert(IS_8X16(mb_type)); + for(list=0; list<2; list++){ + if(h->ref_count[list]>0){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ //FIXME optimize + const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0; + fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1); + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1); + } + } + } + for(list=0; list<2; list++){ + for(i=0; i<2; i++){ + if(IS_DIR(mb_type, i, list)){ + pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy); + mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 ); + my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 ); + + tprintf("final mv:%d %d\n", mx, my); + fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4); + fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); + }else{ + fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); + fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); + } + } + } + } + } + + if( IS_INTER( mb_type ) ) { + h->chroma_pred_mode_table[mb_xy] = 0; + write_back_motion( h, mb_type ); + } + + if( !IS_INTRA16x16( mb_type ) ) { + cbp = decode_cabac_mb_cbp_luma( h ); + cbp |= decode_cabac_mb_cbp_chroma( h ) << 4; + } + + h->cbp_table[mb_xy] = cbp; + + if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) { + if( decode_cabac_mb_transform_size( h ) ) + mb_type |= MB_TYPE_8x8DCT; + } + s->current_picture.mb_type[mb_xy]= mb_type; + + if( cbp || IS_INTRA16x16( mb_type ) ) { + const uint8_t *scan, *scan8x8, *dc_scan; + int dqp; + + if(IS_INTERLACED(mb_type)){ + scan= s->qscale ? h->field_scan : h->field_scan_q0; + dc_scan= luma_dc_field_scan; + }else{ + scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; + dc_scan= luma_dc_zigzag_scan; + } + scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; + + h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h ); + if( dqp == INT_MIN ){ + av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + s->qscale += dqp; + if(((unsigned)s->qscale) > 51){ + if(s->qscale<0) s->qscale+= 52; + else s->qscale-= 52; + } + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); + + if( IS_INTRA16x16( mb_type ) ) { + int i; + //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); + if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0) + return -1; + if( cbp&15 ) { + for( i = 0; i < 16; i++ ) { + //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); + if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ) + return -1; + } + } else { + fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); + } + } else { + int i8x8, i4x4; + for( i8x8 = 0; i8x8 < 4; i8x8++ ) { + if( cbp & (1<mb + 64*i8x8, 5, 4*i8x8, + scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) + return -1; + } else + for( i4x4 = 0; i4x4 < 4; i4x4++ ) { + const int index = 4*i8x8 + i4x4; + //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); + if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 ) + return -1; + } + } else { + uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; + nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; + } + } + } + + if( cbp&0x30 ){ + int c; + for( c = 0; c < 2; c++ ) { + //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); + if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0) + return -1; + } + } + + if( cbp&0x20 ) { + int c, i; + for( c = 0; c < 2; c++ ) { + for( i = 0; i < 4; i++ ) { + const int index = 16 + 4 * c + i; + //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); + if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0) + return -1; + } + } + } else { + uint8_t * const nnz= &h->non_zero_count_cache[0]; + nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = + nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + } + } else { + uint8_t * const nnz= &h->non_zero_count_cache[0]; + fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); + nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = + nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + } + + s->current_picture.qscale_table[mb_xy]= s->qscale; + write_back_non_zero_count(h); + + return 0; +} + + +static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { + int i, d; + const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + + if( bS[0] < 4 ) { + int8_t tc[4]; + for(i=0; i<4; i++) + tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; + h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); + } else { + /* 16px edge length, because bS=4 is triggered by being at + * the edge of an intra MB, so all 4 bS are the same */ + for( d = 0; d < 16; d++ ) { + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int p2 = pix[-3]; + + const int q0 = pix[0]; + const int q1 = pix[1]; + const int q2 = pix[2]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( ABS( p2 - p0 ) < beta) + { + const int p3 = pix[-4]; + /* p0', p1', p2' */ + pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + } + if( ABS( q2 - q0 ) < beta) + { + const int q3 = pix[3]; + /* q0', q1', q2' */ + pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + }else{ + /* p0', q0' */ + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]); + } + pix += stride; + } + } +} +static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { + int i; + const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + + if( bS[0] < 4 ) { + int8_t tc[4]; + for(i=0; i<4; i++) + tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; + h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); + } else { + h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); + } +} + +static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { + int i; + for( i = 0; i < 16; i++, pix += stride) { + int index_a; + int alpha; + int beta; + + int qp_index; + int bS_index = (i >> 1); + if (h->mb_field_decoding_flag) { + bS_index &= ~1; + bS_index |= (i & 1); + } + + if( bS[bS_index] == 0 ) { + continue; + } + + qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3); + index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); + alpha = alpha_table[index_a]; + beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + + + if( bS[bS_index] < 4 ) { + const int tc0 = tc0_table[index_a][bS[bS_index] - 1]; + /* 4px edge length */ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int p2 = pix[-3]; + const int q0 = pix[0]; + const int q1 = pix[1]; + const int q2 = pix[2]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + int tc = tc0; + int i_delta; + + if( ABS( p2 - p0 ) < beta ) { + pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); + tc++; + } + if( ABS( q2 - q0 ) < beta ) { + pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); + tc++; + } + + i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ + pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); + } + }else{ + /* 4px edge length */ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int p2 = pix[-3]; + + const int q0 = pix[0]; + const int q1 = pix[1]; + const int q2 = pix[2]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( ABS( p2 - p0 ) < beta) + { + const int p3 = pix[-4]; + /* p0', p1', p2' */ + pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + } + if( ABS( q2 - q0 ) < beta) + { + const int q3 = pix[3]; + /* q0', q1', q2' */ + pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + }else{ + /* p0', q0' */ + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); + } + } + } +} +static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) { + int i; + for( i = 0; i < 8; i++, pix += stride) { + int index_a; + int alpha; + int beta; + + int qp_index; + int bS_index = i; + + if( bS[bS_index] == 0 ) { + continue; + } + + qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3); + index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); + alpha = alpha_table[index_a]; + beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + if( bS[bS_index] < 4 ) { + const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1; + /* 2px edge length (because we use same bS than the one for luma) */ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int q0 = pix[0]; + const int q1 = pix[1]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + + pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ + pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); + } + }else{ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int q0 = pix[0]; + const int q1 = pix[1]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); + } + } + } +} + +static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { + int i, d; + const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + const int pix_next = stride; + + if( bS[0] < 4 ) { + int8_t tc[4]; + for(i=0; i<4; i++) + tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; + h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); + } else { + /* 16px edge length, see filter_mb_edgev */ + for( d = 0; d < 16; d++ ) { + const int p0 = pix[-1*pix_next]; + const int p1 = pix[-2*pix_next]; + const int p2 = pix[-3*pix_next]; + const int q0 = pix[0]; + const int q1 = pix[1*pix_next]; + const int q2 = pix[2*pix_next]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + const int p3 = pix[-4*pix_next]; + const int q3 = pix[ 3*pix_next]; + + if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( ABS( p2 - p0 ) < beta) { + /* p0', p1', p2' */ + pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + } + if( ABS( q2 - q0 ) < beta) { + /* q0', q1', q2' */ + pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + }else{ + /* p0', q0' */ + pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); + } + pix++; + } + } +} + +static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { + int i; + const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); + const int alpha = alpha_table[index_a]; + const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + + if( bS[0] < 4 ) { + int8_t tc[4]; + for(i=0; i<4; i++) + tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; + h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); + } else { + h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); + } +} + +static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { + MpegEncContext * const s = &h->s; + const int mb_xy= mb_x + mb_y*s->mb_stride; + int first_vertical_edge_done = 0; + int dir; + /* FIXME: A given frame may occupy more than one position in + * the reference list. So ref2frm should be populated with + * frame numbers, not indices. */ + static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + + //for sufficiently low qp, filtering wouldn't do anything + //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp + if(!h->mb_aff_frame){ + int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset); + int qp = s->current_picture.qscale_table[mb_xy]; + if(qp <= qp_thresh + && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) + && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ + return; + } + } + + if (h->mb_aff_frame + // left mb is in picture + && h->slice_table[mb_xy-1] != 255 + // and current and left pair do not have the same interlaced type + && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1])) + // and left mb is in the same slice if deblocking_filter == 2 + && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) { + /* First vertical edge is different in MBAFF frames + * There are 8 different bS to compute and 2 different Qp + */ + int bS[8]; + int qp[2]; + int chroma_qp[2]; + + int i; + first_vertical_edge_done = 1; + for( i = 0; i < 8; i++ ) { + int y = i>>1; + int b_idx= 8 + 4 + 8*y; + int bn_idx= b_idx - 1; + + int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1]; + + if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || + IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { + bS[i] = 4; + } else if( h->non_zero_count_cache[b_idx] != 0 || + /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */ + h->non_zero_count_cache[bn_idx] != 0 ) { + bS[i] = 2; + } else { + int l; + bS[i] = 0; + for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { + if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || + ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { + bS[i] = 1; + break; + } + } + } + } + if(bS[0]+bS[1]+bS[2]+bS[3] != 0) { + // Do not use s->qscale as luma quantizer because it has not the same + // value in IPCM macroblocks. + qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1; + chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1; + qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1; + chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1; + + /* Filter edge */ + tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize); + { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp ); + filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp ); + filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp ); + } + } + /* dir : 0 -> vertical edge, 1 -> horizontal edge */ + for( dir = 0; dir < 2; dir++ ) + { + int edge; + const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; + const int mb_type = s->current_picture.mb_type[mb_xy]; + const int mbm_type = s->current_picture.mb_type[mbm_xy]; + int start = h->slice_table[mbm_xy] == 255 ? 1 : 0; + + const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) + == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; + // how often to recheck mv-based bS when iterating between edges + const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 : + (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0; + // how often to recheck mv-based bS when iterating along each edge + const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)); + + if (first_vertical_edge_done) { + start = 1; + first_vertical_edge_done = 0; + } + + if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy]) + start = 1; + + /* Calculate bS */ + for( edge = start; edge < edges; edge++ ) { + /* mbn_xy: neighbor macroblock */ + const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; + const int mbn_type = s->current_picture.mb_type[mbn_xy]; + int bS[4]; + int qp; + + if( (edge&1) && IS_8x8DCT(mb_type) ) + continue; + + if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0) + && !IS_INTERLACED(mb_type) + && IS_INTERLACED(mbn_type) + ) { + // This is a special case in the norm where the filtering must + // be done twice (one each of the field) even if we are in a + // frame macroblock. + // + unsigned int tmp_linesize = 2 * linesize; + unsigned int tmp_uvlinesize = 2 * uvlinesize; + int mbn_xy = mb_xy - 2 * s->mb_stride; + int qp, chroma_qp; + + // first filtering + if( IS_INTRA(mb_type) || + IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) { + bS[0] = bS[1] = bS[2] = bS[3] = 3; + } else { + // TODO + av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n"); + } + /* Filter edge */ + // Do not use s->qscale as luma quantizer because it has not the same + // value in IPCM macroblocks. + qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); + { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp ); + chroma_qp = ( h->chroma_qp + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp ); + + // second filtering + mbn_xy += s->mb_stride; + if( IS_INTRA(mb_type) || + IS_INTRA(mbn_type) ) { + bS[0] = bS[1] = bS[2] = bS[3] = 3; + } else { + // TODO + av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n"); + } + /* Filter edge */ + // Do not use s->qscale as luma quantizer because it has not the same + // value in IPCM macroblocks. + qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); + { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp ); + chroma_qp = ( h->chroma_qp + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + continue; + } + if( IS_INTRA(mb_type) || + IS_INTRA(mbn_type) ) { + int value; + if (edge == 0) { + if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type)) + || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0)) + ) { + value = 4; + } else { + value = 3; + } + } else { + value = 3; + } + bS[0] = bS[1] = bS[2] = bS[3] = value; + } else { + int i, l; + int mv_done; + + if( edge & mask_edge ) { + bS[0] = bS[1] = bS[2] = bS[3] = 0; + mv_done = 1; + } + else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { + int b_idx= 8 + 4 + edge * (dir ? 8:1); + int bn_idx= b_idx - (dir ? 8:1); + int v = 0; + for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) { + v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || + ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4; + } + bS[0] = bS[1] = bS[2] = bS[3] = v; + mv_done = 1; + } + else + mv_done = 0; + + for( i = 0; i < 4; i++ ) { + int x = dir == 0 ? edge : i; + int y = dir == 0 ? i : edge; + int b_idx= 8 + 4 + x + 8*y; + int bn_idx= b_idx - (dir ? 8:1); + + if( h->non_zero_count_cache[b_idx] != 0 || + h->non_zero_count_cache[bn_idx] != 0 ) { + bS[i] = 2; + } + else if(!mv_done) + { + bS[i] = 0; + for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { + if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || + ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { + bS[i] = 1; + break; + } + } + } + } + + if(bS[0]+bS[1]+bS[2]+bS[3] == 0) + continue; + } + + /* Filter edge */ + // Do not use s->qscale as luma quantizer because it has not the same + // value in IPCM macroblocks. + qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]); + tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); + { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + if( dir == 0 ) { + filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp ); + if( (edge&1) == 0 ) { + int chroma_qp = ( h->chroma_qp + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp ); + filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp ); + } + } else { + filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp ); + if( (edge&1) == 0 ) { + int chroma_qp = ( h->chroma_qp + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp ); + } + } + } + } +} + +static int decode_slice(H264Context *h){ + MpegEncContext * const s = &h->s; + const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; + + s->mb_skip_run= -1; + + if( h->pps.cabac ) { + int i; + + /* realign */ + align_get_bits( &s->gb ); + + /* init cabac */ + ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 ); + ff_init_cabac_decoder( &h->cabac, + s->gb.buffer + get_bits_count(&s->gb)/8, + ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8); + /* calculate pre-state */ + for( i= 0; i < 460; i++ ) { + int pre; + if( h->slice_type == I_TYPE ) + pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 ); + else + pre = clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 ); + + if( pre <= 63 ) + h->cabac_state[i] = 2 * ( 63 - pre ) + 0; + else + h->cabac_state[i] = 2 * ( pre - 64 ) + 1; + } + + for(;;){ + int ret = decode_mb_cabac(h); + int eos; + + if(ret>=0) hl_decode_mb(h); + + /* XXX: useless as decode_mb_cabac it doesn't support that ... */ + if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ? + s->mb_y++; + + if(ret>=0) ret = decode_mb_cabac(h); + + if(ret>=0) hl_decode_mb(h); + s->mb_y--; + } + eos = get_cabac_terminate( &h->cabac ); + + if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) { + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); + return -1; + } + + if( ++s->mb_x >= s->mb_width ) { + s->mb_x = 0; + ff_draw_horiz_band(s, 16*s->mb_y, 16); + ++s->mb_y; + if(h->mb_aff_frame) { + ++s->mb_y; + } + } + + if( eos || s->mb_y >= s->mb_height ) { + tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + return 0; + } + } + + } else { + for(;;){ + int ret = decode_mb_cavlc(h); + + if(ret>=0) hl_decode_mb(h); + + if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ? + s->mb_y++; + ret = decode_mb_cavlc(h); + + if(ret>=0) hl_decode_mb(h); + s->mb_y--; + } + + if(ret<0){ + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); + + return -1; + } + + if(++s->mb_x >= s->mb_width){ + s->mb_x=0; + ff_draw_horiz_band(s, 16*s->mb_y, 16); + ++s->mb_y; + if(h->mb_aff_frame) { + ++s->mb_y; + } + if(s->mb_y >= s->mb_height){ + tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); + + if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return 0; + }else{ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return -1; + } + } + } + + if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ + tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); + if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return 0; + }else{ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); + + return -1; + } + } + } + } + +#if 0 + for(;s->mb_y < s->mb_height; s->mb_y++){ + for(;s->mb_x < s->mb_width; s->mb_x++){ + int ret= decode_mb(h); + + hl_decode_mb(h); + + if(ret<0){ + av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); + + return -1; + } + + if(++s->mb_x >= s->mb_width){ + s->mb_x=0; + if(++s->mb_y >= s->mb_height){ + if(get_bits_count(s->gb) == s->gb.size_in_bits){ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return 0; + }else{ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return -1; + } + } + } + + if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){ + if(get_bits_count(s->gb) == s->gb.size_in_bits){ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); + + return 0; + }else{ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); + + return -1; + } + } + } + s->mb_x=0; + ff_draw_horiz_band(s, 16*s->mb_y, 16); + } +#endif + return -1; //not reached +} + +static int decode_unregistered_user_data(H264Context *h, int size){ + MpegEncContext * const s = &h->s; + uint8_t user_data[16+256]; + int e, build, i; + + if(size<16) + return -1; + + for(i=0; igb, 8); + } + + user_data[i]= 0; + e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build); + if(e==1 && build>=0) + h->x264_build= build; + + if(s->avctx->debug & FF_DEBUG_BUGS) + av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16); + + for(; igb, 8); + + return 0; +} + +static int decode_sei(H264Context *h){ + MpegEncContext * const s = &h->s; + + while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){ + int size, type; + + type=0; + do{ + type+= show_bits(&s->gb, 8); + }while(get_bits(&s->gb, 8) == 255); + + size=0; + do{ + size+= show_bits(&s->gb, 8); + }while(get_bits(&s->gb, 8) == 255); + + switch(type){ + case 5: + if(decode_unregistered_user_data(h, size) < 0); + return -1; + break; + default: + skip_bits(&s->gb, 8*size); + } + + //FIXME check bits here + align_get_bits(&s->gb); + } + + return 0; +} + +static inline void decode_hrd_parameters(H264Context *h, SPS *sps){ + MpegEncContext * const s = &h->s; + int cpb_count, i; + cpb_count = get_ue_golomb(&s->gb) + 1; + get_bits(&s->gb, 4); /* bit_rate_scale */ + get_bits(&s->gb, 4); /* cpb_size_scale */ + for(i=0; igb); /* bit_rate_value_minus1 */ + get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */ + get_bits1(&s->gb); /* cbr_flag */ + } + get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */ + get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */ + get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */ + get_bits(&s->gb, 5); /* time_offset_length */ +} + +static inline int decode_vui_parameters(H264Context *h, SPS *sps){ + MpegEncContext * const s = &h->s; + int aspect_ratio_info_present_flag, aspect_ratio_idc; + int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag; + + aspect_ratio_info_present_flag= get_bits1(&s->gb); + + if( aspect_ratio_info_present_flag ) { + aspect_ratio_idc= get_bits(&s->gb, 8); + if( aspect_ratio_idc == EXTENDED_SAR ) { + sps->sar.num= get_bits(&s->gb, 16); + sps->sar.den= get_bits(&s->gb, 16); + }else if(aspect_ratio_idc < 14){ + sps->sar= pixel_aspect[aspect_ratio_idc]; + }else{ + av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n"); + return -1; + } + }else{ + sps->sar.num= + sps->sar.den= 0; + } +// s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height); + + if(get_bits1(&s->gb)){ /* overscan_info_present_flag */ + get_bits1(&s->gb); /* overscan_appropriate_flag */ + } + + if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */ + get_bits(&s->gb, 3); /* video_format */ + get_bits1(&s->gb); /* video_full_range_flag */ + if(get_bits1(&s->gb)){ /* colour_description_present_flag */ + get_bits(&s->gb, 8); /* colour_primaries */ + get_bits(&s->gb, 8); /* transfer_characteristics */ + get_bits(&s->gb, 8); /* matrix_coefficients */ + } + } + + if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */ + get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */ + get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */ + } + + sps->timing_info_present_flag = get_bits1(&s->gb); + if(sps->timing_info_present_flag){ + sps->num_units_in_tick = get_bits_long(&s->gb, 32); + sps->time_scale = get_bits_long(&s->gb, 32); + sps->fixed_frame_rate_flag = get_bits1(&s->gb); + } + + nal_hrd_parameters_present_flag = get_bits1(&s->gb); + if(nal_hrd_parameters_present_flag) + decode_hrd_parameters(h, sps); + vcl_hrd_parameters_present_flag = get_bits1(&s->gb); + if(vcl_hrd_parameters_present_flag) + decode_hrd_parameters(h, sps); + if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag) + get_bits1(&s->gb); /* low_delay_hrd_flag */ + get_bits1(&s->gb); /* pic_struct_present_flag */ + + sps->bitstream_restriction_flag = get_bits1(&s->gb); + if(sps->bitstream_restriction_flag){ + get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */ + get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */ + get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */ + get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */ + get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */ + sps->num_reorder_frames = get_ue_golomb(&s->gb); + get_ue_golomb(&s->gb); /* max_dec_frame_buffering */ + } + + return 0; +} + +static void decode_scaling_list(H264Context *h, uint8_t *factors, int size, + const uint8_t *jvt_list, const uint8_t *fallback_list){ + MpegEncContext * const s = &h->s; + int i, last = 8, next = 8; + const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8; + if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */ + memcpy(factors, fallback_list, size*sizeof(uint8_t)); + else + for(i=0;igb)) & 0xff; + if(!i && !next){ /* matrix not written, we use the preset one */ + memcpy(factors, jvt_list, size*sizeof(uint8_t)); + break; + } + last = factors[scan[i]] = next ? next : last; + } +} + +static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps, + uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){ + MpegEncContext * const s = &h->s; + int fallback_sps = !is_sps && sps->scaling_matrix_present; + const uint8_t *fallback[4] = { + fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0], + fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1], + fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0], + fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1] + }; + if(get_bits1(&s->gb)){ + sps->scaling_matrix_present |= is_sps; + decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y + decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr + decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb + decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y + decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr + decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb + if(is_sps || pps->transform_8x8_mode){ + decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y + decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y + } + } else if(fallback_sps) { + memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t)); + memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t)); + } +} + +static inline int decode_seq_parameter_set(H264Context *h){ + MpegEncContext * const s = &h->s; + int profile_idc, level_idc; + int sps_id, i; + SPS *sps; + + profile_idc= get_bits(&s->gb, 8); + get_bits1(&s->gb); //constraint_set0_flag + get_bits1(&s->gb); //constraint_set1_flag + get_bits1(&s->gb); //constraint_set2_flag + get_bits1(&s->gb); //constraint_set3_flag + get_bits(&s->gb, 4); // reserved + level_idc= get_bits(&s->gb, 8); + sps_id= get_ue_golomb(&s->gb); + + sps= &h->sps_buffer[ sps_id ]; + sps->profile_idc= profile_idc; + sps->level_idc= level_idc; + + if(sps->profile_idc >= 100){ //high profile + if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc + get_bits1(&s->gb); //residual_color_transform_flag + get_ue_golomb(&s->gb); //bit_depth_luma_minus8 + get_ue_golomb(&s->gb); //bit_depth_chroma_minus8 + sps->transform_bypass = get_bits1(&s->gb); + decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8); + }else + sps->scaling_matrix_present = 0; + + sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; + sps->poc_type= get_ue_golomb(&s->gb); + + if(sps->poc_type == 0){ //FIXME #define + sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4; + } else if(sps->poc_type == 1){//FIXME #define + sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb); + sps->offset_for_non_ref_pic= get_se_golomb(&s->gb); + sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb); + sps->poc_cycle_length= get_ue_golomb(&s->gb); + + for(i=0; ipoc_cycle_length; i++) + sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb); + } + if(sps->poc_type > 2){ + av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type); + return -1; + } + + sps->ref_frame_count= get_ue_golomb(&s->gb); + if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){ + av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n"); + } + sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb); + sps->mb_width= get_ue_golomb(&s->gb) + 1; + sps->mb_height= get_ue_golomb(&s->gb) + 1; + if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 || + avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)) + return -1; + + sps->frame_mbs_only_flag= get_bits1(&s->gb); + if(!sps->frame_mbs_only_flag) + sps->mb_aff= get_bits1(&s->gb); + else + sps->mb_aff= 0; + + sps->direct_8x8_inference_flag= get_bits1(&s->gb); + + sps->crop= get_bits1(&s->gb); + if(sps->crop){ + sps->crop_left = get_ue_golomb(&s->gb); + sps->crop_right = get_ue_golomb(&s->gb); + sps->crop_top = get_ue_golomb(&s->gb); + sps->crop_bottom= get_ue_golomb(&s->gb); + if(sps->crop_left || sps->crop_top){ + av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); + } + }else{ + sps->crop_left = + sps->crop_right = + sps->crop_top = + sps->crop_bottom= 0; + } + + sps->vui_parameters_present_flag= get_bits1(&s->gb); + if( sps->vui_parameters_present_flag ) + decode_vui_parameters(h, sps); + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n", + sps_id, sps->profile_idc, sps->level_idc, + sps->poc_type, + sps->ref_frame_count, + sps->mb_width, sps->mb_height, + sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"), + sps->direct_8x8_inference_flag ? "8B8" : "", + sps->crop_left, sps->crop_right, + sps->crop_top, sps->crop_bottom, + sps->vui_parameters_present_flag ? "VUI" : "" + ); + } + return 0; +} + +static inline int decode_picture_parameter_set(H264Context *h, int bit_length){ + MpegEncContext * const s = &h->s; + int pps_id= get_ue_golomb(&s->gb); + PPS *pps= &h->pps_buffer[pps_id]; + + pps->sps_id= get_ue_golomb(&s->gb); + pps->cabac= get_bits1(&s->gb); + pps->pic_order_present= get_bits1(&s->gb); + pps->slice_group_count= get_ue_golomb(&s->gb) + 1; + if(pps->slice_group_count > 1 ){ + pps->mb_slice_group_map_type= get_ue_golomb(&s->gb); + av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n"); + switch(pps->mb_slice_group_map_type){ + case 0: +#if 0 +| for( i = 0; i <= num_slice_groups_minus1; i++ ) | | | +| run_length[ i ] |1 |ue(v) | +#endif + break; + case 2: +#if 0 +| for( i = 0; i < num_slice_groups_minus1; i++ ) | | | +|{ | | | +| top_left_mb[ i ] |1 |ue(v) | +| bottom_right_mb[ i ] |1 |ue(v) | +| } | | | +#endif + break; + case 3: + case 4: + case 5: +#if 0 +| slice_group_change_direction_flag |1 |u(1) | +| slice_group_change_rate_minus1 |1 |ue(v) | +#endif + break; + case 6: +#if 0 +| slice_group_id_cnt_minus1 |1 |ue(v) | +| for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | | +|) | | | +| slice_group_id[ i ] |1 |u(v) | +#endif + break; + } + } + pps->ref_count[0]= get_ue_golomb(&s->gb) + 1; + pps->ref_count[1]= get_ue_golomb(&s->gb) + 1; + if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){ + av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n"); + return -1; + } + + pps->weighted_pred= get_bits1(&s->gb); + pps->weighted_bipred_idc= get_bits(&s->gb, 2); + pps->init_qp= get_se_golomb(&s->gb) + 26; + pps->init_qs= get_se_golomb(&s->gb) + 26; + pps->chroma_qp_index_offset= get_se_golomb(&s->gb); + pps->deblocking_filter_parameters_present= get_bits1(&s->gb); + pps->constrained_intra_pred= get_bits1(&s->gb); + pps->redundant_pic_cnt_present = get_bits1(&s->gb); + + memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t)); + memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t)); + + if(get_bits_count(&s->gb) < bit_length){ + pps->transform_8x8_mode= get_bits1(&s->gb); + decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8); + get_se_golomb(&s->gb); //second_chroma_qp_index_offset + } + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n", + pps_id, pps->sps_id, + pps->cabac ? "CABAC" : "CAVLC", + pps->slice_group_count, + pps->ref_count[0], pps->ref_count[1], + pps->weighted_pred ? "weighted" : "", + pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset, + pps->deblocking_filter_parameters_present ? "LPAR" : "", + pps->constrained_intra_pred ? "CONSTR" : "", + pps->redundant_pic_cnt_present ? "REDU" : "", + pps->transform_8x8_mode ? "8x8DCT" : "" + ); + } + + return 0; +} + +/** + * finds the end of the current frame in the bitstream. + * @return the position of the first byte of the next frame, or -1 + */ +static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){ + int i; + uint32_t state; + ParseContext *pc = &(h->s.parse_context); +//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]); +// mb_addr= pc->mb_addr - 1; + state= pc->state; + for(i=0; i<=buf_size; i++){ + if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){ + tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i); + if(pc->frame_start_found){ + // If there isn't one more byte in the buffer + // the test on first_mb_in_slice cannot be done yet + // do it at next call. + if (i >= buf_size) break; + if (buf[i] & 0x80) { + // first_mb_in_slice is 0, probably the first nal of a new + // slice + tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i); + pc->state=-1; + pc->frame_start_found= 0; + return i-4; + } + } + pc->frame_start_found = 1; + } + if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){ + if(pc->frame_start_found){ + pc->state=-1; + pc->frame_start_found= 0; + return i-4; + } + } + if (istate= state; + return END_NOT_FOUND; +} + +static int h264_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + H264Context *h = s->priv_data; + ParseContext *pc = &h->s.parse_context; + int next; + + next= find_frame_end(h, buf, buf_size); + + if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + + *poutbuf = (uint8_t *)buf; + *poutbuf_size = buf_size; + return next; +} + +static int h264_split(AVCodecContext *avctx, + const uint8_t *buf, int buf_size) +{ + int i; + uint32_t state = -1; + int has_sps= 0; + + for(i=0; i<=buf_size; i++){ + if((state&0xFFFFFF1F) == 0x107) + has_sps=1; +/* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){ + }*/ + if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){ + if(has_sps){ + while(i>4 && buf[i-5]==0) i--; + return i-4; + } + } + if (is; + AVCodecContext * const avctx= s->avctx; + int buf_index=0; +#if 0 + int i; + for(i=0; i<50; i++){ + av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]); + } +#endif + h->slice_num = 0; + s->current_picture_ptr= NULL; + for(;;){ + int consumed; + int dst_length; + int bit_length; + uint8_t *ptr; + int i, nalsize = 0; + + if(h->is_avc) { + if(buf_index >= buf_size) break; + nalsize = 0; + for(i = 0; i < h->nal_length_size; i++) + nalsize = (nalsize << 8) | buf[buf_index++]; + if(nalsize <= 1){ + if(nalsize == 1){ + buf_index++; + continue; + }else{ + av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); + break; + } + } + } else { + // start code prefix search + for(; buf_index + 3 < buf_size; buf_index++){ + // this should allways succeed in the first iteration + if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1) + break; + } + + if(buf_index+3 >= buf_size) break; + + buf_index+=3; + } + + ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); + if(ptr[dst_length - 1] == 0) dst_length--; + bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1); + + if(s->avctx->debug&FF_DEBUG_STARTCODE){ + av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length); + } + + if (h->is_avc && (nalsize != consumed)) + av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize); + + buf_index += consumed; + + if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id + ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)) + continue; + + switch(h->nal_unit_type){ + case NAL_IDR_SLICE: + idr(h); //FIXME ensure we don't loose some frames if there is reordering + case NAL_SLICE: + init_get_bits(&s->gb, ptr, bit_length); + h->intra_gb_ptr= + h->inter_gb_ptr= &s->gb; + s->data_partitioning = 0; + + if(decode_slice_header(h) < 0){ + av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); + break; + } + s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE); + if(h->redundant_pic_count==0 && s->hurry_up < 5 + && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) + && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) + && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE) + && avctx->skip_frame < AVDISCARD_ALL) + decode_slice(h); + break; + case NAL_DPA: + init_get_bits(&s->gb, ptr, bit_length); + h->intra_gb_ptr= + h->inter_gb_ptr= NULL; + s->data_partitioning = 1; + + if(decode_slice_header(h) < 0){ + av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); + } + break; + case NAL_DPB: + init_get_bits(&h->intra_gb, ptr, bit_length); + h->intra_gb_ptr= &h->intra_gb; + break; + case NAL_DPC: + init_get_bits(&h->inter_gb, ptr, bit_length); + h->inter_gb_ptr= &h->inter_gb; + + if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning + && s->hurry_up < 5 + && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) + && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) + && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE) + && avctx->skip_frame < AVDISCARD_ALL) + decode_slice(h); + break; + case NAL_SEI: + init_get_bits(&s->gb, ptr, bit_length); + decode_sei(h); + break; + case NAL_SPS: + init_get_bits(&s->gb, ptr, bit_length); + decode_seq_parameter_set(h); + + if(s->flags& CODEC_FLAG_LOW_DELAY) + s->low_delay=1; + + if(avctx->has_b_frames < 2) + avctx->has_b_frames= !s->low_delay; + break; + case NAL_PPS: + init_get_bits(&s->gb, ptr, bit_length); + + decode_picture_parameter_set(h, bit_length); + + break; + case NAL_AUD: + case NAL_END_SEQUENCE: + case NAL_END_STREAM: + case NAL_FILLER_DATA: + case NAL_SPS_EXT: + case NAL_AUXILIARY_SLICE: + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type); + } + } + + if(!s->current_picture_ptr) return buf_index; //no frame + + s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; + s->current_picture_ptr->pict_type= s->pict_type; + + h->prev_frame_num_offset= h->frame_num_offset; + h->prev_frame_num= h->frame_num; + if(s->current_picture_ptr->reference){ + h->prev_poc_msb= h->poc_msb; + h->prev_poc_lsb= h->poc_lsb; + } + if(s->current_picture_ptr->reference) + execute_ref_pic_marking(h, h->mmco, h->mmco_index); + + ff_er_frame_end(s); + + MPV_frame_end(s); + + return buf_index; +} + +/** + * returns the number of bytes consumed for building the current frame + */ +static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){ + if(s->flags&CODEC_FLAG_TRUNCATED){ + pos -= s->parse_context.last_index; + if(pos<0) pos=0; // FIXME remove (unneeded?) + + return pos; + }else{ + if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...) + if(pos+10>buf_size) pos=buf_size; // oops ;) + + return pos; + } +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + H264Context *h = avctx->priv_data; + MpegEncContext *s = &h->s; + AVFrame *pict = data; + int buf_index; + + s->flags= avctx->flags; + s->flags2= avctx->flags2; + + /* no supplementary picture */ + if (buf_size == 0) { + return 0; + } + + if(s->flags&CODEC_FLAG_TRUNCATED){ + int next= find_frame_end(h, buf, buf_size); + + if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 ) + return buf_size; +//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index); + } + + if(h->is_avc && !h->got_avcC) { + int i, cnt, nalsize; + unsigned char *p = avctx->extradata; + if(avctx->extradata_size < 7) { + av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); + return -1; + } + if(*p != 1) { + av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p); + return -1; + } + /* sps and pps in the avcC always have length coded with 2 bytes, + so put a fake nal_length_size = 2 while parsing them */ + h->nal_length_size = 2; + // Decode sps from avcC + cnt = *(p+5) & 0x1f; // Number of sps + p += 6; + for (i = 0; i < cnt; i++) { + nalsize = BE_16(p) + 2; + if(decode_nal_units(h, p, nalsize) < 0) { + av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); + return -1; + } + p += nalsize; + } + // Decode pps from avcC + cnt = *(p++); // Number of pps + for (i = 0; i < cnt; i++) { + nalsize = BE_16(p) + 2; + if(decode_nal_units(h, p, nalsize) != nalsize) { + av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); + return -1; + } + p += nalsize; + } + // Now store right nal length size, that will be use to parse all other nals + h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; + // Do not reparse avcC + h->got_avcC = 1; + } + + if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){ + if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) + return -1; + } + + buf_index=decode_nal_units(h, buf, buf_size); + if(buf_index < 0) + return -1; + + //FIXME do something with unavailable reference frames + +// if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size); + if(!s->current_picture_ptr){ + av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n"); + return -1; + } + + { + Picture *out = s->current_picture_ptr; +#if 0 //decode order + *data_size = sizeof(AVFrame); +#else + /* Sort B-frames into display order */ + Picture *cur = s->current_picture_ptr; + Picture *prev = h->delayed_output_pic; + int out_idx = 0; + int pics = 0; + int out_of_order; + int cross_idr = 0; + int dropped_frame = 0; + int i; + + if(h->sps.bitstream_restriction_flag + && s->avctx->has_b_frames < h->sps.num_reorder_frames){ + s->avctx->has_b_frames = h->sps.num_reorder_frames; + s->low_delay = 0; + } + + while(h->delayed_pic[pics]) pics++; + h->delayed_pic[pics++] = cur; + if(cur->reference == 0) + cur->reference = 1; + + for(i=0; h->delayed_pic[i]; i++) + if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0) + cross_idr = 1; + + out = h->delayed_pic[0]; + for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++) + if(h->delayed_pic[i]->poc < out->poc){ + out = h->delayed_pic[i]; + out_idx = i; + } + + out_of_order = !cross_idr && prev && out->poc < prev->poc; + if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) + { } + else if(prev && pics <= s->avctx->has_b_frames) + out = prev; + else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15) + || (s->low_delay && + ((!cross_idr && prev && out->poc > prev->poc + 2) + || cur->pict_type == B_TYPE))) + { + s->low_delay = 0; + s->avctx->has_b_frames++; + out = prev; + } + else if(out_of_order) + out = prev; + + if(out_of_order || pics > s->avctx->has_b_frames){ + dropped_frame = (out != h->delayed_pic[out_idx]); + for(i=out_idx; h->delayed_pic[i]; i++) + h->delayed_pic[i] = h->delayed_pic[i+1]; + } + + if(prev == out && !dropped_frame) + *data_size = 0; + else + *data_size = sizeof(AVFrame); + if(prev && prev != out && prev->reference == 1) + prev->reference = 0; + h->delayed_output_pic = out; +#endif + + if(out) + *pict= *(AVFrame*)out; + else + av_log(avctx, AV_LOG_DEBUG, "no picture\n"); + } + + assert(pict->data[0] || !*data_size); + ff_print_debug_info(s, pict); +//printf("out %d\n", (int)pict->data[0]); +#if 0 //? + + /* Return the Picture timestamp as the frame number */ + /* we substract 1 because it is added on utils.c */ + avctx->frame_number = s->picture_number - 1; +#endif + return get_consumed_bytes(s, buf_index, buf_size); +} +#if 0 +static inline void fill_mb_avail(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + + if(s->mb_y){ + h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num; + h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num; + h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num; + }else{ + h->mb_avail[0]= + h->mb_avail[1]= + h->mb_avail[2]= 0; + } + h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num; + h->mb_avail[4]= 1; //FIXME move out + h->mb_avail[5]= 0; //FIXME move out +} +#endif + +#if 0 //selftest +#define COUNT 8000 +#define SIZE (COUNT*40) +int main(){ + int i; + uint8_t temp[SIZE]; + PutBitContext pb; + GetBitContext gb; +// int int_temp[10000]; + DSPContext dsp; + AVCodecContext avctx; + + dsputil_init(&dsp, &avctx); + + init_put_bits(&pb, temp, SIZE); + printf("testing unsigned exp golomb\n"); + for(i=0; idsp.h264_idct_add(ref, block, 4); +/* for(j=0; j<16; j++){ + printf("%d ", ref[j]); + } + printf("\n");*/ + + for(j=0; j<16; j++){ + int diff= ABS(src[j] - ref[j]); + + error+= diff*diff; + max_error= FFMAX(max_error, diff); + } + } + printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error ); +#if 0 + printf("testing quantizer\n"); + for(qp=0; qp<52; qp++){ + for(i=0; i<16; i++) + src1_block[i]= src2_block[i]= random()%255; + + } +#endif + printf("Testing NAL layer\n"); + + uint8_t bitstream[COUNT]; + uint8_t nal[COUNT*2]; + H264Context h; + memset(&h, 0, sizeof(H264Context)); + + for(i=0; ipriv_data; + MpegEncContext *s = &h->s; + + av_freep(&h->rbsp_buffer); + free_tables(h); //FIXME cleanup init stuff perhaps + MPV_common_end(s); + +// memset(h, 0, sizeof(H264Context)); + + return 0; +} + + +AVCodec h264_decoder = { + "h264", + CODEC_TYPE_VIDEO, + CODEC_ID_H264, + sizeof(H264Context), + decode_init, + NULL, + decode_end, + decode_frame, + /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, + .flush= flush_dpb, +}; + +AVCodecParser h264_parser = { + { CODEC_ID_H264 }, + sizeof(H264Context), + NULL, + h264_parse, + ff_parse_close, + h264_split, +}; + +#include "svq3.c" diff --git a/mpeg4/src/libavcodec/h264data.h b/mpeg4/src/libavcodec/h264data.h new file mode 100644 index 0000000000000000000000000000000000000000..3132102df4ae77c815ec1c33227fefce97129bc7 --- /dev/null +++ b/mpeg4/src/libavcodec/h264data.h @@ -0,0 +1,1240 @@ +/* + * H26L/H264/AVC/JVT/14496-10/... encoder/decoder + * Copyright (c) 2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file h264data.h + * @brief + * H264 / AVC / MPEG4 part10 codec data table + * @author Michael Niedermayer + */ + +#define VERT_PRED 0 +#define HOR_PRED 1 +#define DC_PRED 2 +#define DIAG_DOWN_LEFT_PRED 3 +#define DIAG_DOWN_RIGHT_PRED 4 +#define VERT_RIGHT_PRED 5 +#define HOR_DOWN_PRED 6 +#define VERT_LEFT_PRED 7 +#define HOR_UP_PRED 8 + +#define LEFT_DC_PRED 9 +#define TOP_DC_PRED 10 +#define DC_128_PRED 11 + + +#define DC_PRED8x8 0 +#define HOR_PRED8x8 1 +#define VERT_PRED8x8 2 +#define PLANE_PRED8x8 3 + +#define LEFT_DC_PRED8x8 4 +#define TOP_DC_PRED8x8 5 +#define DC_128_PRED8x8 6 + +#define EXTENDED_SAR 255 + +static const AVRational pixel_aspect[14]={ + {0, 1}, + {1, 1}, + {12, 11}, + {10, 11}, + {16, 11}, + {40, 33}, + {24, 11}, + {20, 11}, + {32, 11}, + {80, 33}, + {18, 11}, + {15, 11}, + {64, 33}, + {160,99}, +}; + +static const uint8_t golomb_to_pict_type[5]= +{P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE}; + +static const uint8_t pict_type_to_golomb[7]= +{-1, 2, 0, 1, -1, 4, 3}; + +static const uint8_t chroma_qp[52]={ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, + 12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, + 28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37, + 37,38,38,38,39,39,39,39 + +}; + +static const uint8_t golomb_to_intra4x4_cbp[48]={ + 47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46, + 16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4, + 8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41 +}; + +static const uint8_t golomb_to_inter_cbp[48]={ + 0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13, + 14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46, + 17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41 +}; + +static const uint8_t intra4x4_cbp_to_golomb[48]={ + 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2, + 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1, + 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0 +}; + +static const uint8_t inter_cbp_to_golomb[48]={ + 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11, + 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19, + 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12 +}; + +static const uint8_t chroma_dc_coeff_token_len[4*5]={ + 2, 0, 0, 0, + 6, 1, 0, 0, + 6, 6, 3, 0, + 6, 7, 7, 6, + 6, 8, 8, 7, +}; + +static const uint8_t chroma_dc_coeff_token_bits[4*5]={ + 1, 0, 0, 0, + 7, 1, 0, 0, + 4, 6, 1, 0, + 3, 3, 2, 5, + 2, 3, 2, 0, +}; + +static const uint8_t coeff_token_len[4][4*17]={ +{ + 1, 0, 0, 0, + 6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6, + 11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10, + 14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14, + 16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16, +}, +{ + 2, 0, 0, 0, + 6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4, + 8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7, + 12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12, + 13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14, +}, +{ + 4, 0, 0, 0, + 6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4, + 7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5, + 8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8, + 10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10, +}, +{ + 6, 0, 0, 0, + 6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +} +}; + +static const uint8_t coeff_token_bits[4][4*17]={ +{ + 1, 0, 0, 0, + 5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3, + 7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4, + 15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8, + 15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8, +}, +{ + 3, 0, 0, 0, + 11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4, + 4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4, + 15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12, + 11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4, +}, +{ + 15, 0, 0, 0, + 15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11, + 11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13, + 11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8, + 13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2, +}, +{ + 3, 0, 0, 0, + 0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15, + 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31, + 32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47, + 48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63, +} +}; + +static const uint8_t total_zeros_len[16][16]= { + {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9}, + {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6}, + {4,3,3,3,4,4,3,3,4,5,5,6,5,6}, + {5,3,4,4,3,3,3,4,3,4,5,5,5}, + {4,4,4,3,3,3,3,3,4,5,4,5}, + {6,5,3,3,3,3,3,3,4,3,6}, + {6,5,3,3,3,2,3,4,3,6}, + {6,4,5,3,2,2,3,3,6}, + {6,6,4,2,2,3,2,5}, + {5,5,3,2,2,2,4}, + {4,4,3,3,1,3}, + {4,4,2,1,3}, + {3,3,1,2}, + {2,2,1}, + {1,1}, +}; + +static const uint8_t total_zeros_bits[16][16]= { + {1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1}, + {7,6,5,4,3,5,4,3,2,3,2,3,2,1,0}, + {5,7,6,5,4,3,4,3,2,3,2,1,1,0}, + {3,7,5,4,6,5,4,3,3,2,2,1,0}, + {5,4,3,7,6,5,4,3,2,1,1,0}, + {1,1,7,6,5,4,3,2,1,1,0}, + {1,1,5,4,3,3,2,1,1,0}, + {1,1,1,3,3,2,2,1,0}, + {1,0,1,3,2,1,1,1}, + {1,0,1,3,2,1,1}, + {0,1,1,2,1,3}, + {0,1,1,1,1}, + {0,1,1,1}, + {0,1,1}, + {0,1}, +}; + +static const uint8_t chroma_dc_total_zeros_len[3][4]= { + { 1, 2, 3, 3,}, + { 1, 2, 2, 0,}, + { 1, 1, 0, 0,}, +}; + +static const uint8_t chroma_dc_total_zeros_bits[3][4]= { + { 1, 1, 1, 0,}, + { 1, 1, 0, 0,}, + { 1, 0, 0, 0,}, +}; + +static const uint8_t run_len[7][16]={ + {1,1}, + {1,2,2}, + {2,2,2,2}, + {2,2,2,3,3}, + {2,2,3,3,3,3}, + {2,3,3,3,3,3,3}, + {3,3,3,3,3,3,3,4,5,6,7,8,9,10,11}, +}; + +static const uint8_t run_bits[7][16]={ + {1,0}, + {1,1,0}, + {3,2,1,0}, + {3,2,1,1,0}, + {3,2,3,2,1,0}, + {3,0,1,3,2,5,4}, + {7,6,5,4,3,2,1,1,1,1,1,1,1,1,1}, +}; + +/* +o-o o-o + / / / +o-o o-o + ,---' +o-o o-o + / / / +o-o o-o +*/ + +static const uint8_t scan8[16 + 2*4]={ + 4+1*8, 5+1*8, 4+2*8, 5+2*8, + 6+1*8, 7+1*8, 6+2*8, 7+2*8, + 4+3*8, 5+3*8, 4+4*8, 5+4*8, + 6+3*8, 7+3*8, 6+4*8, 7+4*8, + 1+1*8, 2+1*8, + 1+2*8, 2+2*8, + 1+4*8, 2+4*8, + 1+5*8, 2+5*8, +}; + +static const uint8_t zigzag_scan[16]={ + 0+0*4, 1+0*4, 0+1*4, 0+2*4, + 1+1*4, 2+0*4, 3+0*4, 2+1*4, + 1+2*4, 0+3*4, 1+3*4, 2+2*4, + 3+1*4, 3+2*4, 2+3*4, 3+3*4, +}; + +static const uint8_t field_scan[16]={ + 0+0*4, 0+1*4, 1+0*4, 0+2*4, + 0+3*4, 1+1*4, 1+2*4, 1+3*4, + 2+0*4, 2+1*4, 2+2*4, 2+3*4, + 3+0*4, 3+1*4, 3+2*4, 3+3*4, +}; + +static const uint8_t luma_dc_zigzag_scan[16]={ + 0*16 + 0*64, 1*16 + 0*64, 2*16 + 0*64, 0*16 + 2*64, + 3*16 + 0*64, 0*16 + 1*64, 1*16 + 1*64, 2*16 + 1*64, + 1*16 + 2*64, 2*16 + 2*64, 3*16 + 2*64, 0*16 + 3*64, + 3*16 + 1*64, 1*16 + 3*64, 2*16 + 3*64, 3*16 + 3*64, +}; + +static const uint8_t luma_dc_field_scan[16]={ + 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64, + 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64, + 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64, + 1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64, +}; + +static const uint8_t chroma_dc_scan[4]={ + (0+0*2)*16, (1+0*2)*16, + (0+1*2)*16, (1+1*2)*16, //FIXME +}; + +static const uint8_t zigzag_scan8x8[64]={ + 0+0*8, 1+0*8, 0+1*8, 0+2*8, + 1+1*8, 2+0*8, 3+0*8, 2+1*8, + 1+2*8, 0+3*8, 0+4*8, 1+3*8, + 2+2*8, 3+1*8, 4+0*8, 5+0*8, + 4+1*8, 3+2*8, 2+3*8, 1+4*8, + 0+5*8, 0+6*8, 1+5*8, 2+4*8, + 3+3*8, 4+2*8, 5+1*8, 6+0*8, + 7+0*8, 6+1*8, 5+2*8, 4+3*8, + 3+4*8, 2+5*8, 1+6*8, 0+7*8, + 1+7*8, 2+6*8, 3+5*8, 4+4*8, + 5+3*8, 6+2*8, 7+1*8, 7+2*8, + 6+3*8, 5+4*8, 4+5*8, 3+6*8, + 2+7*8, 3+7*8, 4+6*8, 5+5*8, + 6+4*8, 7+3*8, 7+4*8, 6+5*8, + 5+6*8, 4+7*8, 5+7*8, 6+6*8, + 7+5*8, 7+6*8, 6+7*8, 7+7*8, +}; + +// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] +static const uint8_t zigzag_scan8x8_cavlc[64]={ + 0+0*8, 1+1*8, 1+2*8, 2+2*8, + 4+1*8, 0+5*8, 3+3*8, 7+0*8, + 3+4*8, 1+7*8, 5+3*8, 6+3*8, + 2+7*8, 6+4*8, 5+6*8, 7+5*8, + 1+0*8, 2+0*8, 0+3*8, 3+1*8, + 3+2*8, 0+6*8, 4+2*8, 6+1*8, + 2+5*8, 2+6*8, 6+2*8, 5+4*8, + 3+7*8, 7+3*8, 4+7*8, 7+6*8, + 0+1*8, 3+0*8, 0+4*8, 4+0*8, + 2+3*8, 1+5*8, 5+1*8, 5+2*8, + 1+6*8, 3+5*8, 7+1*8, 4+5*8, + 4+6*8, 7+4*8, 5+7*8, 6+7*8, + 0+2*8, 2+1*8, 1+3*8, 5+0*8, + 1+4*8, 2+4*8, 6+0*8, 4+3*8, + 0+7*8, 4+4*8, 7+2*8, 3+6*8, + 5+5*8, 6+5*8, 6+6*8, 7+7*8, +}; + +#define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16bit +#define MB_TYPE_8x8DCT 0x01000000 +#define IS_REF0(a) ((a)&MB_TYPE_REF0) +#define IS_8x8DCT(a) ((a)&MB_TYPE_8x8DCT) + + +typedef struct IMbInfo{ + uint16_t type; + uint8_t pred_mode; + uint8_t cbp; +} IMbInfo; + +static const IMbInfo i_mb_type_info[26]={ +{MB_TYPE_INTRA4x4 , -1, -1}, +{MB_TYPE_INTRA16x16, 2, 0}, +{MB_TYPE_INTRA16x16, 1, 0}, +{MB_TYPE_INTRA16x16, 0, 0}, +{MB_TYPE_INTRA16x16, 3, 0}, +{MB_TYPE_INTRA16x16, 2, 16}, +{MB_TYPE_INTRA16x16, 1, 16}, +{MB_TYPE_INTRA16x16, 0, 16}, +{MB_TYPE_INTRA16x16, 3, 16}, +{MB_TYPE_INTRA16x16, 2, 32}, +{MB_TYPE_INTRA16x16, 1, 32}, +{MB_TYPE_INTRA16x16, 0, 32}, +{MB_TYPE_INTRA16x16, 3, 32}, +{MB_TYPE_INTRA16x16, 2, 15+0}, +{MB_TYPE_INTRA16x16, 1, 15+0}, +{MB_TYPE_INTRA16x16, 0, 15+0}, +{MB_TYPE_INTRA16x16, 3, 15+0}, +{MB_TYPE_INTRA16x16, 2, 15+16}, +{MB_TYPE_INTRA16x16, 1, 15+16}, +{MB_TYPE_INTRA16x16, 0, 15+16}, +{MB_TYPE_INTRA16x16, 3, 15+16}, +{MB_TYPE_INTRA16x16, 2, 15+32}, +{MB_TYPE_INTRA16x16, 1, 15+32}, +{MB_TYPE_INTRA16x16, 0, 15+32}, +{MB_TYPE_INTRA16x16, 3, 15+32}, +{MB_TYPE_INTRA_PCM , -1, -1}, +}; + +typedef struct PMbInfo{ + uint16_t type; + uint8_t partition_count; +} PMbInfo; + +static const PMbInfo p_mb_type_info[5]={ +{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, +{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, +{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 4}, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4}, +}; + +static const PMbInfo p_sub_mb_type_info[4]={ +{MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, +{MB_TYPE_16x8 |MB_TYPE_P0L0 , 2}, +{MB_TYPE_8x16 |MB_TYPE_P0L0 , 2}, +{MB_TYPE_8x8 |MB_TYPE_P0L0 , 4}, +}; + +static const PMbInfo b_mb_type_info[23]={ +{MB_TYPE_DIRECT2 , 1, }, +{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, +{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, +{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0 , 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +}; + +static const PMbInfo b_sub_mb_type_info[13]={ +{MB_TYPE_DIRECT2 , 1, }, +{MB_TYPE_16x16|MB_TYPE_P0L0 , 1, }, +{MB_TYPE_16x16 |MB_TYPE_P0L1 , 1, }, +{MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1 , 1, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, +{MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, +{MB_TYPE_8x8 |MB_TYPE_P0L0 |MB_TYPE_P1L0 , 4, }, +{MB_TYPE_8x8 |MB_TYPE_P0L1 |MB_TYPE_P1L1, 4, }, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, +}; + + +static const uint8_t rem6[52]={ +0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, +}; + +static const uint8_t div6[52]={ +0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, +}; + +static const uint8_t default_scaling4[2][16]={ +{ 6,13,20,28, + 13,20,28,32, + 20,28,32,37, + 28,32,37,42 +},{ + 10,14,20,24, + 14,20,24,27, + 20,24,27,30, + 24,27,30,34 +}}; + +static const uint8_t default_scaling8[2][64]={ +{ 6,10,13,16,18,23,25,27, + 10,11,16,18,23,25,27,29, + 13,16,18,23,25,27,29,31, + 16,18,23,25,27,29,31,33, + 18,23,25,27,29,31,33,36, + 23,25,27,29,31,33,36,38, + 25,27,29,31,33,36,38,40, + 27,29,31,33,36,38,40,42 +},{ + 9,13,15,17,19,21,22,24, + 13,13,17,19,21,22,24,25, + 15,17,19,21,22,24,25,27, + 17,19,21,22,24,25,27,28, + 19,21,22,24,25,27,28,30, + 21,22,24,25,27,28,30,32, + 22,24,25,27,28,30,32,33, + 24,25,27,28,30,32,33,35 +}}; + +static const int dequant4_coeff_init[6][3]={ + {10,13,16}, + {11,14,18}, + {13,16,20}, + {14,18,23}, + {16,20,25}, + {18,23,29}, +}; + +static const int dequant8_coeff_init_scan[16] = { + 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1 +}; +static const int dequant8_coeff_init[6][6]={ + {20,18,32,19,25,24}, + {22,19,35,21,28,26}, + {26,23,42,24,33,31}, + {28,25,45,26,35,33}, + {32,28,51,30,40,38}, + {36,32,58,34,46,43}, +}; + +#define QUANT_SHIFT 22 + +static const int quant_coeff[52][16]={ + { 419430,258111,419430,258111,258111,167772,258111,167772,419430,258111,419430,258111,258111,167772,258111,167772,}, + { 381300,239675,381300,239675,239675,149131,239675,149131,381300,239675,381300,239675,239675,149131,239675,149131,}, + { 322639,209715,322639,209715,209715,134218,209715,134218,322639,209715,322639,209715,209715,134218,209715,134218,}, + { 299593,186414,299593,186414,186414,116711,186414,116711,299593,186414,299593,186414,186414,116711,186414,116711,}, + { 262144,167772,262144,167772,167772,107374,167772,107374,262144,167772,262144,167772,167772,107374,167772,107374,}, + { 233017,145889,233017,145889,145889, 92564,145889, 92564,233017,145889,233017,145889,145889, 92564,145889, 92564,}, + { 209715,129056,209715,129056,129056, 83886,129056, 83886,209715,129056,209715,129056,129056, 83886,129056, 83886,}, + { 190650,119837,190650,119837,119837, 74565,119837, 74565,190650,119837,190650,119837,119837, 74565,119837, 74565,}, + { 161319,104858,161319,104858,104858, 67109,104858, 67109,161319,104858,161319,104858,104858, 67109,104858, 67109,}, + { 149797, 93207,149797, 93207, 93207, 58356, 93207, 58356,149797, 93207,149797, 93207, 93207, 58356, 93207, 58356,}, + { 131072, 83886,131072, 83886, 83886, 53687, 83886, 53687,131072, 83886,131072, 83886, 83886, 53687, 83886, 53687,}, + { 116508, 72944,116508, 72944, 72944, 46282, 72944, 46282,116508, 72944,116508, 72944, 72944, 46282, 72944, 46282,}, + { 104858, 64528,104858, 64528, 64528, 41943, 64528, 41943,104858, 64528,104858, 64528, 64528, 41943, 64528, 41943,}, + { 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283, 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283,}, + { 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554, 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554,}, + { 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178, 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178,}, + { 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844, 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844,}, + { 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141, 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141,}, + { 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972, 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972,}, + { 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641, 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641,}, + { 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777, 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777,}, + { 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589, 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589,}, + { 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422, 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422,}, + { 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570, 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570,}, + { 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486, 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486,}, + { 23831, 14980, 23831, 14980, 14980, 9321, 14980, 9321, 23831, 14980, 23831, 14980, 14980, 9321, 14980, 9321,}, + { 20165, 13107, 20165, 13107, 13107, 8389, 13107, 8389, 20165, 13107, 20165, 13107, 13107, 8389, 13107, 8389,}, + { 18725, 11651, 18725, 11651, 11651, 7294, 11651, 7294, 18725, 11651, 18725, 11651, 11651, 7294, 11651, 7294,}, + { 16384, 10486, 16384, 10486, 10486, 6711, 10486, 6711, 16384, 10486, 16384, 10486, 10486, 6711, 10486, 6711,}, + { 14564, 9118, 14564, 9118, 9118, 5785, 9118, 5785, 14564, 9118, 14564, 9118, 9118, 5785, 9118, 5785,}, + { 13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243, 13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243,}, + { 11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660, 11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660,}, + { 10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194, 10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194,}, + { 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647, 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647,}, + { 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355, 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355,}, + { 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893, 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893,}, + { 6554, 4033, 6554, 4033, 4033, 2621, 4033, 2621, 6554, 4033, 6554, 4033, 4033, 2621, 4033, 2621,}, + { 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330, 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330,}, + { 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097, 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097,}, + { 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824, 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824,}, + { 4096, 2621, 4096, 2621, 2621, 1678, 2621, 1678, 4096, 2621, 4096, 2621, 2621, 1678, 2621, 1678,}, + { 3641, 2280, 3641, 2280, 2280, 1446, 2280, 1446, 3641, 2280, 3641, 2280, 2280, 1446, 2280, 1446,}, + { 3277, 2016, 3277, 2016, 2016, 1311, 2016, 1311, 3277, 2016, 3277, 2016, 2016, 1311, 2016, 1311,}, + { 2979, 1872, 2979, 1872, 1872, 1165, 1872, 1165, 2979, 1872, 2979, 1872, 1872, 1165, 1872, 1165,}, + { 2521, 1638, 2521, 1638, 1638, 1049, 1638, 1049, 2521, 1638, 2521, 1638, 1638, 1049, 1638, 1049,}, + { 2341, 1456, 2341, 1456, 1456, 912, 1456, 912, 2341, 1456, 2341, 1456, 1456, 912, 1456, 912,}, + { 2048, 1311, 2048, 1311, 1311, 839, 1311, 839, 2048, 1311, 2048, 1311, 1311, 839, 1311, 839,}, + { 1820, 1140, 1820, 1140, 1140, 723, 1140, 723, 1820, 1140, 1820, 1140, 1140, 723, 1140, 723,}, + { 1638, 1008, 1638, 1008, 1008, 655, 1008, 655, 1638, 1008, 1638, 1008, 1008, 655, 1008, 655,}, + { 1489, 936, 1489, 936, 936, 583, 936, 583, 1489, 936, 1489, 936, 936, 583, 936, 583,}, + { 1260, 819, 1260, 819, 819, 524, 819, 524, 1260, 819, 1260, 819, 819, 524, 819, 524,}, + { 1170, 728, 1170, 728, 728, 456, 728, 456, 1170, 728, 1170, 728, 728, 456, 728, 456,}, +}; + + +/* Deblocking filter (p153) */ +static const int alpha_table[52] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 4, 5, 6, + 7, 8, 9, 10, 12, 13, 15, 17, 20, 22, + 25, 28, 32, 36, 40, 45, 50, 56, 63, 71, + 80, 90,101,113,127,144,162,182,203,226, + 255, 255 +}; +static const int beta_table[52] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 2, 2, 3, + 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, + 18, 18 +}; +static const int tc0_table[52][3] = { + { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, + { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, + { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 }, + { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 }, + { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 }, + { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 }, + { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 }, + { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 }, + { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 } +}; + +/* Cabac pre state table */ + +static const int cabac_context_init_I[460][2] = +{ + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28,127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 unsused for I */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, + + /* 24- 39 */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + + /* 40 - 53 */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, + + /* 54 - 59 */ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, + { 0, 0 }, { 0, 0 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 -> 87 */ + { 0, 11 }, { 1, 55 }, { 0, 69 }, { -17, 127 }, + { -13, 102 },{ 0, 82 }, { -7, 74 }, { -21, 107 }, + { -27, 127 },{ -31, 127 },{ -24, 127 }, { -18, 95 }, + { -27, 127 },{ -21, 114 },{ -30, 127 }, { -17, 123 }, + { -12, 115 },{ -16, 122 }, + + /* 88 -> 104 */ + { -11, 115 },{ -12, 63 }, { -2, 68 }, { -15, 84 }, + { -13, 104 },{ -3, 70 }, { -8, 93 }, { -10, 90 }, + { -30, 127 },{ -1, 74 }, { -6, 97 }, { -7, 91 }, + { -20, 127 },{ -4, 56 }, { -5, 82 }, { -7, 76 }, + { -22, 125 }, + + /* 105 -> 135 */ + { -7, 93 }, { -11, 87 }, { -3, 77 }, { -5, 71 }, + { -4, 63 }, { -4, 68 }, { -12, 84 }, { -7, 62 }, + { -7, 65 }, { 8, 61 }, { 5, 56 }, { -2, 66 }, + { 1, 64 }, { 0, 61 }, { -2, 78 }, { 1, 50 }, + { 7, 52 }, { 10, 35 }, { 0, 44 }, { 11, 38 }, + { 1, 45 }, { 0, 46 }, { 5, 44 }, { 31, 17 }, + { 1, 51 }, { 7, 50 }, { 28, 19 }, { 16, 33 }, + { 14, 62 }, { -13, 108 },{ -15, 100 }, + + /* 136 -> 165 */ + { -13, 101 },{ -13, 91 }, { -12, 94 }, { -10, 88 }, + { -16, 84 }, { -10, 86 }, { -7, 83 }, { -13, 87 }, + { -19, 94 }, { 1, 70 }, { 0, 72 }, { -5, 74 }, + { 18, 59 }, { -8, 102 }, { -15, 100 }, { 0, 95 }, + { -4, 75 }, { 2, 72 }, { -11, 75 }, { -3, 71 }, + { 15, 46 }, { -13, 69 }, { 0, 62 }, { 0, 65 }, + { 21, 37 }, { -15, 72 }, { 9, 57 }, { 16, 54 }, + { 0, 62 }, { 12, 72 }, + + /* 166 -> 196 */ + { 24, 0 }, { 15, 9 }, { 8, 25 }, { 13, 18 }, + { 15, 9 }, { 13, 19 }, { 10, 37 }, { 12, 18 }, + { 6, 29 }, { 20, 33 }, { 15, 30 }, { 4, 45 }, + { 1, 58 }, { 0, 62 }, { 7, 61 }, { 12, 38 }, + { 11, 45 }, { 15, 39 }, { 11, 42 }, { 13, 44 }, + { 16, 45 }, { 12, 41 }, { 10, 49 }, { 30, 34 }, + { 18, 42 }, { 10, 55 }, { 17, 51 }, { 17, 46 }, + { 0, 89 }, { 26, -19 }, { 22, -17 }, + + /* 197 -> 226 */ + { 26, -17 }, { 30, -25 }, { 28, -20 }, { 33, -23 }, + { 37, -27 }, { 33, -23 }, { 40, -28 }, { 38, -17 }, + { 33, -11 }, { 40, -15 }, { 41, -6 }, { 38, 1 }, + { 41, 17 }, { 30, -6 }, { 27, 3 }, { 26, 22 }, + { 37, -16 }, { 35, -4 }, { 38, -8 }, { 38, -3 }, + { 37, 3 }, { 38, 5 }, { 42, 0 }, { 35, 16 }, + { 39, 22 }, { 14, 48 }, { 27, 37 }, { 21, 60 }, + { 12, 68 }, { 2, 97 }, + + /* 227 -> 251 */ + { -3, 71 }, { -6, 42 }, { -5, 50 }, { -3, 54 }, + { -2, 62 }, { 0, 58 }, { 1, 63 }, { -2, 72 }, + { -1, 74 }, { -9, 91 }, { -5, 67 }, { -5, 27 }, + { -3, 39 }, { -2, 44 }, { 0, 46 }, { -16, 64 }, + { -8, 68 }, { -10, 78 }, { -6, 77 }, { -10, 86 }, + { -12, 92 }, { -15, 55 }, { -10, 60 }, { -6, 62 }, + { -4, 65 }, + + /* 252 -> 275 */ + { -12, 73 }, { -8, 76 }, { -7, 80 }, { -9, 88 }, + { -17, 110 },{ -11, 97 }, { -20, 84 }, { -11, 79 }, + { -6, 73 }, { -4, 74 }, { -13, 86 }, { -13, 96 }, + { -11, 97 }, { -19, 117 },{ -8, 78 }, { -5, 33 }, + { -4, 48 }, { -2, 53 }, { -3, 62 }, { -13, 71 }, + { -10, 79 }, { -12, 86 }, { -13, 90 }, { -14, 97 }, + + /* 276 a bit special (not used, bypass is used instead) */ + { 0, 0 }, + + /* 277 -> 307 */ + { -6, 93 }, { -6, 84 }, { -8, 79 }, { 0, 66 }, + { -1, 71 }, { 0, 62 }, { -2, 60 }, { -2, 59 }, + { -5, 75 }, { -3, 62 }, { -4, 58 }, { -9, 66 }, + { -1, 79 }, { 0, 71 }, { 3, 68 }, { 10, 44 }, + { -7, 62 }, { 15, 36 }, { 14, 40 }, { 16, 27 }, + { 12, 29 }, { 1, 44 }, { 20, 36 }, { 18, 32 }, + { 5, 42 }, { 1, 48 }, { 10, 62 }, { 17, 46 }, + { 9, 64 }, { -12, 104 },{ -11, 97 }, + + /* 308 -> 337 */ + { -16, 96 }, { -7, 88 }, { -8, 85 }, { -7, 85 }, + { -9, 85 }, { -13, 88 }, { 4, 66 }, { -3, 77 }, + { -3, 76 }, { -6, 76 }, { 10, 58 }, { -1, 76 }, + { -1, 83 }, { -7, 99 }, { -14, 95 }, { 2, 95 }, + { 0, 76 }, { -5, 74 }, { 0, 70 }, { -11, 75 }, + { 1, 68 }, { 0, 65 }, { -14, 73 }, { 3, 62 }, + { 4, 62 }, { -1, 68 }, { -13, 75 }, { 11, 55 }, + { 5, 64 }, { 12, 70 }, + + /* 338 -> 368 */ + { 15, 6 }, { 6, 19 }, { 7, 16 }, { 12, 14 }, + { 18, 13 }, { 13, 11 }, { 13, 15 }, { 15, 16 }, + { 12, 23 }, { 13, 23 }, { 15, 20 }, { 14, 26 }, + { 14, 44 }, { 17, 40 }, { 17, 47 }, { 24, 17 }, + { 21, 21 }, { 25, 22 }, { 31, 27 }, { 22, 29 }, + { 19, 35 }, { 14, 50 }, { 10, 57 }, { 7, 63 }, + { -2, 77 }, { -4, 82 }, { -3, 94 }, { 9, 69 }, + { -12, 109 },{ 36, -35 }, { 36, -34 }, + + /* 369 -> 398 */ + { 32, -26 }, { 37, -30 }, { 44, -32 }, { 34, -18 }, + { 34, -15 }, { 40, -15 }, { 33, -7 }, { 35, -5 }, + { 33, 0 }, { 38, 2 }, { 33, 13 }, { 23, 35 }, + { 13, 58 }, { 29, -3 }, { 26, 0 }, { 22, 30 }, + { 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 }, + { 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 }, + { 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 }, + { 29, 39 }, { 19, 66 }, + + /* 399 -> 435 */ + { 31, 21 }, { 31, 31 }, { 25, 50 }, + { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, + { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, + { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, + { -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 }, + { 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 }, + { 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 }, + { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, + { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, + { 0, 68 }, { -9, 92 }, + + /* 436 -> 459 */ + { -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 }, + { -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 }, + { -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 }, + { -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 }, + { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, + { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 } +}; + +static const int cabac_context_init_PB[3][460][2] = +{ + /* i_cabac_init_idc == 0 */ + { + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 */ + { 23, 33 }, { 23, 2 }, { 21, 0 }, { 1, 9 }, + { 0, 49 }, { -37, 118 }, { 5, 57 }, { -13, 78 }, + { -11, 65 }, { 1, 62 }, { 12, 49 }, { -4, 73 }, + { 17, 50 }, + + /* 24 - 39 */ + { 18, 64 }, { 9, 43 }, { 29, 0 }, { 26, 67 }, + { 16, 90 }, { 9, 104 }, { -46, 127 }, { -20, 104 }, + { 1, 67 }, { -13, 78 }, { -11, 65 }, { 1, 62 }, + { -6, 86 }, { -17, 95 }, { -6, 61 }, { 9, 45 }, + + /* 40 - 53 */ + { -3, 69 }, { -6, 81 }, { -11, 96 }, { 6, 55 }, + { 7, 67 }, { -5, 86 }, { 2, 88 }, { 0, 58 }, + { -3, 76 }, { -10, 94 }, { 5, 54 }, { 4, 69 }, + { -3, 81 }, { 0, 88 }, + + /* 54 - 59 */ + { -7, 67 }, { -5, 74 }, { -4, 74 }, { -5, 80 }, + { -7, 72 }, { 1, 58 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 - 87 */ + { 0, 45 }, { -4, 78 }, { -3, 96 }, { -27, 126 }, + { -28, 98 }, { -25, 101 }, { -23, 67 }, { -28, 82 }, + { -20, 94 }, { -16, 83 }, { -22, 110 }, { -21, 91 }, + { -18, 102 }, { -13, 93 }, { -29, 127 }, { -7, 92 }, + { -5, 89 }, { -7, 96 }, { -13, 108 }, { -3, 46 }, + { -1, 65 }, { -1, 57 }, { -9, 93 }, { -3, 74 }, + { -9, 92 }, { -8, 87 }, { -23, 126 }, { 5, 54 }, + { 6, 60 }, { 6, 59 }, { 6, 69 }, { -1, 48 }, + { 0, 68 }, { -4, 69 }, { -8, 88 }, + + /* 105 -> 165 */ + { -2, 85 }, { -6, 78 }, { -1, 75 }, { -7, 77 }, + { 2, 54 }, { 5, 50 }, { -3, 68 }, { 1, 50 }, + { 6, 42 }, { -4, 81 }, { 1, 63 }, { -4, 70 }, + { 0, 67 }, { 2, 57 }, { -2, 76 }, { 11, 35 }, + { 4, 64 }, { 1, 61 }, { 11, 35 }, { 18, 25 }, + { 12, 24 }, { 13, 29 }, { 13, 36 }, { -10, 93 }, + { -7, 73 }, { -2, 73 }, { 13, 46 }, { 9, 49 }, + { -7, 100 }, { 9, 53 }, { 2, 53 }, { 5, 53 }, + { -2, 61 }, { 0, 56 }, { 0, 56 }, { -13, 63 }, + { -5, 60 }, { -1, 62 }, { 4, 57 }, { -6, 69 }, + { 4, 57 }, { 14, 39 }, { 4, 51 }, { 13, 68 }, + { 3, 64 }, { 1, 61 }, { 9, 63 }, { 7, 50 }, + { 16, 39 }, { 5, 44 }, { 4, 52 }, { 11, 48 }, + { -5, 60 }, { -1, 59 }, { 0, 59 }, { 22, 33 }, + { 5, 44 }, { 14, 43 }, { -1, 78 }, { 0, 60 }, + { 9, 69 }, + + /* 166 - 226 */ + { 11, 28 }, { 2, 40 }, { 3, 44 }, { 0, 49 }, + { 0, 46 }, { 2, 44 }, { 2, 51 }, { 0, 47 }, + { 4, 39 }, { 2, 62 }, { 6, 46 }, { 0, 54 }, + { 3, 54 }, { 2, 58 }, { 4, 63 }, { 6, 51 }, + { 6, 57 }, { 7, 53 }, { 6, 52 }, { 6, 55 }, + { 11, 45 }, { 14, 36 }, { 8, 53 }, { -1, 82 }, + { 7, 55 }, { -3, 78 }, { 15, 46 }, { 22, 31 }, + { -1, 84 }, { 25, 7 }, { 30, -7 }, { 28, 3 }, + { 28, 4 }, { 32, 0 }, { 34, -1 }, { 30, 6 }, + { 30, 6 }, { 32, 9 }, { 31, 19 }, { 26, 27 }, + { 26, 30 }, { 37, 20 }, { 28, 34 }, { 17, 70 }, + { 1, 67 }, { 5, 59 }, { 9, 67 }, { 16, 30 }, + { 18, 32 }, { 18, 35 }, { 22, 29 }, { 24, 31 }, + { 23, 38 }, { 18, 43 }, { 20, 41 }, { 11, 63 }, + { 9, 59 }, { 9, 64 }, { -1, 94 }, { -2, 89 }, + { -9, 108 }, + + /* 227 - 275 */ + { -6, 76 }, { -2, 44 }, { 0, 45 }, { 0, 52 }, + { -3, 64 }, { -2, 59 }, { -4, 70 }, { -4, 75 }, + { -8, 82 }, { -17, 102 }, { -9, 77 }, { 3, 24 }, + { 0, 42 }, { 0, 48 }, { 0, 55 }, { -6, 59 }, + { -7, 71 }, { -12, 83 }, { -11, 87 }, { -30, 119 }, + { 1, 58 }, { -3, 29 }, { -1, 36 }, { 1, 38 }, + { 2, 43 }, { -6, 55 }, { 0, 58 }, { 0, 64 }, + { -3, 74 }, { -10, 90 }, { 0, 70 }, { -4, 29 }, + { 5, 31 }, { 7, 42 }, { 1, 59 }, { -2, 58 }, + { -3, 72 }, { -3, 81 }, { -11, 97 }, { 0, 58 }, + { 8, 5 }, { 10, 14 }, { 14, 18 }, { 13, 27 }, + { 2, 40 }, { 0, 58 }, { -3, 70 }, { -6, 79 }, + { -8, 85 }, + + /* 276 a bit special (not used, bypass is used instead) */ + { 0, 0 }, + + /* 277 - 337 */ + { -13, 106 }, { -16, 106 }, { -10, 87 }, { -21, 114 }, + { -18, 110 }, { -14, 98 }, { -22, 110 }, { -21, 106 }, + { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 }, + { -10, 96 }, { -12, 95 }, { -5, 91 }, { -9, 93 }, + { -22, 94 }, { -5, 86 }, { 9, 67 }, { -4, 80 }, + { -10, 85 }, { -1, 70 }, { 7, 60 }, { 9, 58 }, + { 5, 61 }, { 12, 50 }, { 15, 50 }, { 18, 49 }, + { 17, 54 }, { 10, 41 }, { 7, 46 }, { -1, 51 }, + { 7, 49 }, { 8, 52 }, { 9, 41 }, { 6, 47 }, + { 2, 55 }, { 13, 41 }, { 10, 44 }, { 6, 50 }, + { 5, 53 }, { 13, 49 }, { 4, 63 }, { 6, 64 }, + { -2, 69 }, { -2, 59 }, { 6, 70 }, { 10, 44 }, + { 9, 31 }, { 12, 43 }, { 3, 53 }, { 14, 34 }, + { 10, 38 }, { -3, 52 }, { 13, 40 }, { 17, 32 }, + { 7, 44 }, { 7, 38 }, { 13, 50 }, { 10, 57 }, + { 26, 43 }, + + /* 338 - 398 */ + { 14, 11 }, { 11, 14 }, { 9, 11 }, { 18, 11 }, + { 21, 9 }, { 23, -2 }, { 32, -15 }, { 32, -15 }, + { 34, -21 }, { 39, -23 }, { 42, -33 }, { 41, -31 }, + { 46, -28 }, { 38, -12 }, { 21, 29 }, { 45, -24 }, + { 53, -45 }, { 48, -26 }, { 65, -43 }, { 43, -19 }, + { 39, -10 }, { 30, 9 }, { 18, 26 }, { 20, 27 }, + { 0, 57 }, { -14, 82 }, { -5, 75 }, { -19, 97 }, + { -35, 125 }, { 27, 0 }, { 28, 0 }, { 31, -4 }, + { 27, 6 }, { 34, 8 }, { 30, 10 }, { 24, 22 }, + { 33, 19 }, { 22, 32 }, { 26, 31 }, { 21, 41 }, + { 26, 44 }, { 23, 47 }, { 16, 65 }, { 14, 71 }, + { 8, 60 }, { 6, 63 }, { 17, 65 }, { 21, 24 }, + { 23, 20 }, { 26, 23 }, { 27, 32 }, { 28, 23 }, + { 28, 24 }, { 23, 40 }, { 24, 32 }, { 28, 29 }, + { 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 }, + { 11, 86 }, + + /* 399 - 435 */ + { 12, 40 }, { 11, 51 }, { 14, 59 }, + { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, + { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, + { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, + { -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 }, + { 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 }, + { 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 }, + { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, + { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, + { -8, 66 }, { -8, 76 }, + + /* 436 - 459 */ + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 }, + { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, + { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, + }, + + /* i_cabac_init_idc == 1 */ + { + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 */ + { 22, 25 }, { 34, 0 }, { 16, 0 }, { -2, 9 }, + { 4, 41 }, { -29, 118 }, { 2, 65 }, { -6, 71 }, + { -13, 79 }, { 5, 52 }, { 9, 50 }, { -3, 70 }, + { 10, 54 }, + + /* 24 - 39 */ + { 26, 34 }, { 19, 22 }, { 40, 0 }, { 57, 2 }, + { 41, 36 }, { 26, 69 }, { -45, 127 }, { -15, 101 }, + { -4, 76 }, { -6, 71 }, { -13, 79 }, { 5, 52 }, + { 6, 69 }, { -13, 90 }, { 0, 52 }, { 8, 43 }, + + /* 40 - 53 */ + { -2, 69 },{ -5, 82 },{ -10, 96 },{ 2, 59 }, + { 2, 75 },{ -3, 87 },{ -3, 100 },{ 1, 56 }, + { -3, 74 },{ -6, 85 },{ 0, 59 },{ -3, 81 }, + { -7, 86 },{ -5, 95 }, + + /* 54 - 59 */ + { -1, 66 },{ -1, 77 },{ 1, 70 },{ -2, 86 }, + { -5, 72 },{ 0, 61 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 - 104 */ + { 13, 15 }, { 7, 51 }, { 2, 80 }, { -39, 127 }, + { -18, 91 }, { -17, 96 }, { -26, 81 }, { -35, 98 }, + { -24, 102 }, { -23, 97 }, { -27, 119 }, { -24, 99 }, + { -21, 110 }, { -18, 102 }, { -36, 127 }, { 0, 80 }, + { -5, 89 }, { -7, 94 }, { -4, 92 }, { 0, 39 }, + { 0, 65 }, { -15, 84 }, { -35, 127 }, { -2, 73 }, + { -12, 104 }, { -9, 91 }, { -31, 127 }, { 3, 55 }, + { 7, 56 }, { 7, 55 }, { 8, 61 }, { -3, 53 }, + { 0, 68 }, { -7, 74 }, { -9, 88 }, + + /* 105 -> 165 */ + { -13, 103 }, { -13, 91 }, { -9, 89 }, { -14, 92 }, + { -8, 76 }, { -12, 87 }, { -23, 110 }, { -24, 105 }, + { -10, 78 }, { -20, 112 }, { -17, 99 }, { -78, 127 }, + { -70, 127 }, { -50, 127 }, { -46, 127 }, { -4, 66 }, + { -5, 78 }, { -4, 71 }, { -8, 72 }, { 2, 59 }, + { -1, 55 }, { -7, 70 }, { -6, 75 }, { -8, 89 }, + { -34, 119 }, { -3, 75 }, { 32, 20 }, { 30, 22 }, + { -44, 127 }, { 0, 54 }, { -5, 61 }, { 0, 58 }, + { -1, 60 }, { -3, 61 }, { -8, 67 }, { -25, 84 }, + { -14, 74 }, { -5, 65 }, { 5, 52 }, { 2, 57 }, + { 0, 61 }, { -9, 69 }, { -11, 70 }, { 18, 55 }, + { -4, 71 }, { 0, 58 }, { 7, 61 }, { 9, 41 }, + { 18, 25 }, { 9, 32 }, { 5, 43 }, { 9, 47 }, + { 0, 44 }, { 0, 51 }, { 2, 46 }, { 19, 38 }, + { -4, 66 }, { 15, 38 }, { 12, 42 }, { 9, 34 }, + { 0, 89 }, + + /* 166 - 226 */ + { 4, 45 }, { 10, 28 }, { 10, 31 }, { 33, -11 }, + { 52, -43 }, { 18, 15 }, { 28, 0 }, { 35, -22 }, + { 38, -25 }, { 34, 0 }, { 39, -18 }, { 32, -12 }, + { 102, -94 }, { 0, 0 }, { 56, -15 }, { 33, -4 }, + { 29, 10 }, { 37, -5 }, { 51, -29 }, { 39, -9 }, + { 52, -34 }, { 69, -58 }, { 67, -63 }, { 44, -5 }, + { 32, 7 }, { 55, -29 }, { 32, 1 }, { 0, 0 }, + { 27, 36 }, { 33, -25 }, { 34, -30 }, { 36, -28 }, + { 38, -28 }, { 38, -27 }, { 34, -18 }, { 35, -16 }, + { 34, -14 }, { 32, -8 }, { 37, -6 }, { 35, 0 }, + { 30, 10 }, { 28, 18 }, { 26, 25 }, { 29, 41 }, + { 0, 75 }, { 2, 72 }, { 8, 77 }, { 14, 35 }, + { 18, 31 }, { 17, 35 }, { 21, 30 }, { 17, 45 }, + { 20, 42 }, { 18, 45 }, { 27, 26 }, { 16, 54 }, + { 7, 66 }, { 16, 56 }, { 11, 73 }, { 10, 67 }, + { -10, 116 }, + + /* 227 - 275 */ + { -23, 112 }, { -15, 71 }, { -7, 61 }, { 0, 53 }, + { -5, 66 }, { -11, 77 }, { -9, 80 }, { -9, 84 }, + { -10, 87 }, { -34, 127 }, { -21, 101 }, { -3, 39 }, + { -5, 53 }, { -7, 61 }, { -11, 75 }, { -15, 77 }, + { -17, 91 }, { -25, 107 }, { -25, 111 }, { -28, 122 }, + { -11, 76 }, { -10, 44 }, { -10, 52 }, { -10, 57 }, + { -9, 58 }, { -16, 72 }, { -7, 69 }, { -4, 69 }, + { -5, 74 }, { -9, 86 }, { 2, 66 }, { -9, 34 }, + { 1, 32 }, { 11, 31 }, { 5, 52 }, { -2, 55 }, + { -2, 67 }, { 0, 73 }, { -8, 89 }, { 3, 52 }, + { 7, 4 }, { 10, 8 }, { 17, 8 }, { 16, 19 }, + { 3, 37 }, { -1, 61 }, { -5, 73 }, { -1, 70 }, + { -4, 78 }, + + /* 276 a bit special (not used, bypass is used instead) */ + { 0, 0 }, + + /* 277 - 337 */ + { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 }, + { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 }, + { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 }, + { -10, 95 }, { -14, 100 }, { -8, 95 }, { -17, 111 }, + { -28, 114 }, { -6, 89 }, { -2, 80 }, { -4, 82 }, + { -9, 85 }, { -8, 81 }, { -1, 72 }, { 5, 64 }, + { 1, 67 }, { 9, 56 }, { 0, 69 }, { 1, 69 }, + { 7, 69 }, { -7, 69 }, { -6, 67 }, { -16, 77 }, + { -2, 64 }, { 2, 61 }, { -6, 67 }, { -3, 64 }, + { 2, 57 }, { -3, 65 }, { -3, 66 }, { 0, 62 }, + { 9, 51 }, { -1, 66 }, { -2, 71 }, { -2, 75 }, + { -1, 70 }, { -9, 72 }, { 14, 60 }, { 16, 37 }, + { 0, 47 }, { 18, 35 }, { 11, 37 }, { 12, 41 }, + { 10, 41 }, { 2, 48 }, { 12, 41 }, { 13, 41 }, + { 0, 59 }, { 3, 50 }, { 19, 40 }, { 3, 66 }, + { 18, 50 }, + + /* 338 - 398 */ + { 19, -6 }, { 18, -6 }, { 14, 0 }, { 26, -12 }, + { 31, -16 }, { 33, -25 }, { 33, -22 }, { 37, -28 }, + { 39, -30 }, { 42, -30 }, { 47, -42 }, { 45, -36 }, + { 49, -34 }, { 41, -17 }, { 32, 9 }, { 69, -71 }, + { 63, -63 }, { 66, -64 }, { 77, -74 }, { 54, -39 }, + { 52, -35 }, { 41, -10 }, { 36, 0 }, { 40, -1 }, + { 30, 14 }, { 28, 26 }, { 23, 37 }, { 12, 55 }, + { 11, 65 }, { 37, -33 }, { 39, -36 }, { 40, -37 }, + { 38, -30 }, { 46, -33 }, { 42, -30 }, { 40, -24 }, + { 49, -29 }, { 38, -12 }, { 40, -10 }, { 38, -3 }, + { 46, -5 }, { 31, 20 }, { 29, 30 }, { 25, 44 }, + { 12, 48 }, { 11, 49 }, { 26, 45 }, { 22, 22 }, + { 23, 22 }, { 27, 21 }, { 33, 20 }, { 26, 28 }, + { 30, 24 }, { 27, 34 }, { 18, 42 }, { 25, 39 }, + { 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 }, + { 11, 83 }, + + /* 399 - 435 */ + { 25, 32 }, { 21, 49 }, { 21, 54 }, + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, + { -2, 52 }, { -9, 57 }, { -6, 63 }, { -4, 65 }, + { -4, 67 }, { -7, 82 }, + + /* 436 - 459 */ + { -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 }, + { -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 }, + { -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 }, + { -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + }, + + /* i_cabac_init_idc == 2 */ + { + /* 0 - 10 */ + { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, + { 2, 54 }, { 3, 74 }, { -28, 127 }, { -23, 104 }, + { -6, 53 }, { -1, 54 }, { 7, 51 }, + + /* 11 - 23 */ + { 29, 16 }, { 25, 0 }, { 14, 0 }, { -10, 51 }, + { -3, 62 }, { -27, 99 }, { 26, 16 }, { -4, 85 }, + { -24, 102 }, { 5, 57 }, { 6, 57 }, { -17, 73 }, + { 14, 57 }, + + /* 24 - 39 */ + { 20, 40 }, { 20, 10 }, { 29, 0 }, { 54, 0 }, + { 37, 42 }, { 12, 97 }, { -32, 127 }, { -22, 117 }, + { -2, 74 }, { -4, 85 }, { -24, 102 }, { 5, 57 }, + { -6, 93 }, { -14, 88 }, { -6, 44 }, { 4, 55 }, + + /* 40 - 53 */ + { -11, 89 },{ -15, 103 },{ -21, 116 },{ 19, 57 }, + { 20, 58 },{ 4, 84 },{ 6, 96 },{ 1, 63 }, + { -5, 85 },{ -13, 106 },{ 5, 63 },{ 6, 75 }, + { -3, 90 },{ -1, 101 }, + + /* 54 - 59 */ + { 3, 55 },{ -4, 79 },{ -2, 75 },{ -12, 97 }, + { -7, 50 },{ 1, 60 }, + + /* 60 - 69 */ + { 0, 41 }, { 0, 63 }, { 0, 63 }, { 0, 63 }, + { -9, 83 }, { 4, 86 }, { 0, 97 }, { -7, 72 }, + { 13, 41 }, { 3, 62 }, + + /* 70 - 104 */ + { 7, 34 }, { -9, 88 }, { -20, 127 }, { -36, 127 }, + { -17, 91 }, { -14, 95 }, { -25, 84 }, { -25, 86 }, + { -12, 89 }, { -17, 91 }, { -31, 127 }, { -14, 76 }, + { -18, 103 }, { -13, 90 }, { -37, 127 }, { 11, 80 }, + { 5, 76 }, { 2, 84 }, { 5, 78 }, { -6, 55 }, + { 4, 61 }, { -14, 83 }, { -37, 127 }, { -5, 79 }, + { -11, 104 }, { -11, 91 }, { -30, 127 }, { 0, 65 }, + { -2, 79 }, { 0, 72 }, { -4, 92 }, { -6, 56 }, + { 3, 68 }, { -8, 71 }, { -13, 98 }, + + /* 105 -> 165 */ + { -4, 86 }, { -12, 88 }, { -5, 82 }, { -3, 72 }, + { -4, 67 }, { -8, 72 }, { -16, 89 }, { -9, 69 }, + { -1, 59 }, { 5, 66 }, { 4, 57 }, { -4, 71 }, + { -2, 71 }, { 2, 58 }, { -1, 74 }, { -4, 44 }, + { -1, 69 }, { 0, 62 }, { -7, 51 }, { -4, 47 }, + { -6, 42 }, { -3, 41 }, { -6, 53 }, { 8, 76 }, + { -9, 78 }, { -11, 83 }, { 9, 52 }, { 0, 67 }, + { -5, 90 }, { 1, 67 }, { -15, 72 }, { -5, 75 }, + { -8, 80 }, { -21, 83 }, { -21, 64 }, { -13, 31 }, + { -25, 64 }, { -29, 94 }, { 9, 75 }, { 17, 63 }, + { -8, 74 }, { -5, 35 }, { -2, 27 }, { 13, 91 }, + { 3, 65 }, { -7, 69 }, { 8, 77 }, { -10, 66 }, + { 3, 62 }, { -3, 68 }, { -20, 81 }, { 0, 30 }, + { 1, 7 }, { -3, 23 }, { -21, 74 }, { 16, 66 }, + { -23, 124 }, { 17, 37 }, { 44, -18 }, { 50, -34 }, + { -22, 127 }, + + /* 166 - 226 */ + { 4, 39 }, { 0, 42 }, { 7, 34 }, { 11, 29 }, + { 8, 31 }, { 6, 37 }, { 7, 42 }, { 3, 40 }, + { 8, 33 }, { 13, 43 }, { 13, 36 }, { 4, 47 }, + { 3, 55 }, { 2, 58 }, { 6, 60 }, { 8, 44 }, + { 11, 44 }, { 14, 42 }, { 7, 48 }, { 4, 56 }, + { 4, 52 }, { 13, 37 }, { 9, 49 }, { 19, 58 }, + { 10, 48 }, { 12, 45 }, { 0, 69 }, { 20, 33 }, + { 8, 63 }, { 35, -18 }, { 33, -25 }, { 28, -3 }, + { 24, 10 }, { 27, 0 }, { 34, -14 }, { 52, -44 }, + { 39, -24 }, { 19, 17 }, { 31, 25 }, { 36, 29 }, + { 24, 33 }, { 34, 15 }, { 30, 20 }, { 22, 73 }, + { 20, 34 }, { 19, 31 }, { 27, 44 }, { 19, 16 }, + { 15, 36 }, { 15, 36 }, { 21, 28 }, { 25, 21 }, + { 30, 20 }, { 31, 12 }, { 27, 16 }, { 24, 42 }, + { 0, 93 }, { 14, 56 }, { 15, 57 }, { 26, 38 }, + { -24, 127 }, + + /* 227 - 275 */ + { -24, 115 }, { -22, 82 }, { -9, 62 }, { 0, 53 }, + { 0, 59 }, { -14, 85 }, { -13, 89 }, { -13, 94 }, + { -11, 92 }, { -29, 127 }, { -21, 100 }, { -14, 57 }, + { -12, 67 }, { -11, 71 }, { -10, 77 }, { -21, 85 }, + { -16, 88 }, { -23, 104 }, { -15, 98 }, { -37, 127 }, + { -10, 82 }, { -8, 48 }, { -8, 61 }, { -8, 66 }, + { -7, 70 }, { -14, 75 }, { -10, 79 }, { -9, 83 }, + { -12, 92 }, { -18, 108 }, { -4, 79 }, { -22, 69 }, + { -16, 75 }, { -2, 58 }, { 1, 58 }, { -13, 78 }, + { -9, 83 }, { -4, 81 }, { -13, 99 }, { -13, 81 }, + { -6, 38 }, { -13, 62 }, { -6, 58 }, { -2, 59 }, + { -16, 73 }, { -10, 76 }, { -13, 86 }, { -9, 83 }, + { -10, 87 }, + + /* 276 a bit special (not used, bypass is used instead) */ + { 0, 0 }, + + /* 277 - 337 */ + { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 }, + { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 }, + { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 }, + { -10, 94 }, { -15, 102 }, { -10, 99 }, { -13, 106 }, + { -50, 127 }, { -5, 92 }, { 17, 57 }, { -5, 86 }, + { -13, 94 }, { -12, 91 }, { -2, 77 }, { 0, 71 }, + { -1, 73 }, { 4, 64 }, { -7, 81 }, { 5, 64 }, + { 15, 57 }, { 1, 67 }, { 0, 68 }, { -10, 67 }, + { 1, 68 }, { 0, 77 }, { 2, 64 }, { 0, 68 }, + { -5, 78 }, { 7, 55 }, { 5, 59 }, { 2, 65 }, + { 14, 54 }, { 15, 44 }, { 5, 60 }, { 2, 70 }, + { -2, 76 }, { -18, 86 }, { 12, 70 }, { 5, 64 }, + { -12, 70 }, { 11, 55 }, { 5, 56 }, { 0, 69 }, + { 2, 65 }, { -6, 74 }, { 5, 54 }, { 7, 54 }, + { -6, 76 }, { -11, 82 }, { -2, 77 }, { -2, 77 }, + { 25, 42 }, + + /* 338 - 398 */ + { 17, -13 }, { 16, -9 }, { 17, -12 }, { 27, -21 }, + { 37, -30 }, { 41, -40 }, { 42, -41 }, { 48, -47 }, + { 39, -32 }, { 46, -40 }, { 52, -51 }, { 46, -41 }, + { 52, -39 }, { 43, -19 }, { 32, 11 }, { 61, -55 }, + { 56, -46 }, { 62, -50 }, { 81, -67 }, { 45, -20 }, + { 35, -2 }, { 28, 15 }, { 34, 1 }, { 39, 1 }, + { 30, 17 }, { 20, 38 }, { 18, 45 }, { 15, 54 }, + { 0, 79 }, { 36, -16 }, { 37, -14 }, { 37, -17 }, + { 32, 1 }, { 34, 15 }, { 29, 15 }, { 24, 25 }, + { 34, 22 }, { 31, 16 }, { 35, 18 }, { 31, 28 }, + { 33, 41 }, { 36, 28 }, { 27, 47 }, { 21, 62 }, + { 18, 31 }, { 19, 26 }, { 36, 24 }, { 24, 23 }, + { 27, 16 }, { 24, 30 }, { 31, 29 }, { 22, 41 }, + { 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 }, + { 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 }, + { 25, 61 }, + + /* 399 - 435 */ + { 21, 33 }, { 19, 50 }, { 17, 61 }, + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, + { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, + { -6, 68 }, { -10, 79 }, + + /* 436 - 459 */ + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + } +}; diff --git a/mpeg4/src/libavcodec/h264idct.c b/mpeg4/src/libavcodec/h264idct.c new file mode 100644 index 0000000000000000000000000000000000000000..3e44385d5ead65583f3a3a53c21e606b9dba357c --- /dev/null +++ b/mpeg4/src/libavcodec/h264idct.c @@ -0,0 +1,166 @@ +/* + * H.264 IDCT + * Copyright (c) 2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file h264-idct.c + * H.264 IDCT. + * @author Michael Niedermayer + */ + +#include "dsputil.h" + +static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + block[0] += 1<<(shift-1); + + for(i=0; i<4; i++){ + const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; + const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; + const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; + const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); + + block[0 + block_stride*i]= z0 + z3; + block[1 + block_stride*i]= z1 + z2; + block[2 + block_stride*i]= z1 - z2; + block[3 + block_stride*i]= z0 - z3; + } + + for(i=0; i<4; i++){ + const int z0= block[i + block_stride*0] + block[i + block_stride*2]; + const int z1= block[i + block_stride*0] - block[i + block_stride*2]; + const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; + const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); + + dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; + dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; + dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; + dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ]; + } +} + +void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){ + idct_internal(dst, block, stride, 4, 6, 1); +} + +void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ + idct_internal(dst, block, stride, 8, 3, 1); +} + +void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ + idct_internal(dst, block, stride, 8, 3, 0); +} + +void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ + int i; + DCTELEM (*src)[8] = (DCTELEM(*)[8])block; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + block[0] += 32; + + for( i = 0; i < 8; i++ ) + { + const int a0 = src[i][0] + src[i][4]; + const int a2 = src[i][0] - src[i][4]; + const int a4 = (src[i][2]>>1) - src[i][6]; + const int a6 = (src[i][6]>>1) + src[i][2]; + + const int b0 = a0 + a6; + const int b2 = a2 + a4; + const int b4 = a2 - a4; + const int b6 = a0 - a6; + + const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1); + const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1); + const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1); + const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1); + + const int b1 = (a7>>2) + a1; + const int b3 = a3 + (a5>>2); + const int b5 = (a3>>2) - a5; + const int b7 = a7 - (a1>>2); + + src[i][0] = b0 + b7; + src[i][7] = b0 - b7; + src[i][1] = b2 + b5; + src[i][6] = b2 - b5; + src[i][2] = b4 + b3; + src[i][5] = b4 - b3; + src[i][3] = b6 + b1; + src[i][4] = b6 - b1; + } + for( i = 0; i < 8; i++ ) + { + const int a0 = src[0][i] + src[4][i]; + const int a2 = src[0][i] - src[4][i]; + const int a4 = (src[2][i]>>1) - src[6][i]; + const int a6 = (src[6][i]>>1) + src[2][i]; + + const int b0 = a0 + a6; + const int b2 = a2 + a4; + const int b4 = a2 - a4; + const int b6 = a0 - a6; + + const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1); + const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1); + const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1); + const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1); + + const int b1 = (a7>>2) + a1; + const int b3 = a3 + (a5>>2); + const int b5 = (a3>>2) - a5; + const int b7 = a7 - (a1>>2); + + dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ]; + dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ]; + dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ]; + dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ]; + dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ]; + dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ]; + dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ]; + dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ]; + } +} + +// assumes all AC coefs are 0 +void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ + int i, j; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int dc = (block[0] + 32) >> 6; + for( j = 0; j < 4; j++ ) + { + for( i = 0; i < 4; i++ ) + dst[i] = cm[ dst[i] + dc ]; + dst += stride; + } +} + +void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ + int i, j; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int dc = (block[0] + 32) >> 6; + for( j = 0; j < 8; j++ ) + { + for( i = 0; i < 8; i++ ) + dst[i] = cm[ dst[i] + dc ]; + dst += stride; + } +} diff --git a/mpeg4/src/libavcodec/huffyuv.c b/mpeg4/src/libavcodec/huffyuv.c new file mode 100644 index 0000000000000000000000000000000000000000..d65943fcc161a34eb2ff4d6ca470486aa73e3a27 --- /dev/null +++ b/mpeg4/src/libavcodec/huffyuv.c @@ -0,0 +1,1270 @@ +/* + * huffyuv codec for libavcodec + * + * Copyright (c) 2002-2003 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of + * the algorithm used + */ + +/** + * @file huffyuv.c + * huffyuv codec for libavcodec. + */ + +#include "common.h" +#include "bitstream.h" +#include "avcodec.h" +#include "dsputil.h" + +#define VLC_BITS 11 + +#ifdef WORDS_BIGENDIAN +#define B 3 +#define G 2 +#define R 1 +#else +#define B 0 +#define G 1 +#define R 2 +#endif + +typedef enum Predictor{ + LEFT= 0, + PLANE, + MEDIAN, +} Predictor; + +typedef struct HYuvContext{ + AVCodecContext *avctx; + Predictor predictor; + GetBitContext gb; + PutBitContext pb; + int interlaced; + int decorrelate; + int bitstream_bpp; + int version; + int yuy2; //use yuy2 instead of 422P + int bgr32; //use bgr32 instead of bgr24 + int width, height; + int flags; + int context; + int picture_number; + int last_slice_end; + uint8_t *temp[3]; + uint64_t stats[3][256]; + uint8_t len[3][256]; + uint32_t bits[3][256]; + VLC vlc[3]; + AVFrame picture; + uint8_t *bitstream_buffer; + unsigned int bitstream_buffer_size; + DSPContext dsp; +}HYuvContext; + +static const unsigned char classic_shift_luma[] = { + 34,36,35,69,135,232,9,16,10,24,11,23,12,16,13,10,14,8,15,8, + 16,8,17,20,16,10,207,206,205,236,11,8,10,21,9,23,8,8,199,70, + 69,68, 0 +}; + +static const unsigned char classic_shift_chroma[] = { + 66,36,37,38,39,40,41,75,76,77,110,239,144,81,82,83,84,85,118,183, + 56,57,88,89,56,89,154,57,58,57,26,141,57,56,58,57,58,57,184,119, + 214,245,116,83,82,49,80,79,78,77,44,75,41,40,39,38,37,36,34, 0 +}; + +static const unsigned char classic_add_luma[256] = { + 3, 9, 5, 12, 10, 35, 32, 29, 27, 50, 48, 45, 44, 41, 39, 37, + 73, 70, 68, 65, 64, 61, 58, 56, 53, 50, 49, 46, 44, 41, 38, 36, + 68, 65, 63, 61, 58, 55, 53, 51, 48, 46, 45, 43, 41, 39, 38, 36, + 35, 33, 32, 30, 29, 27, 26, 25, 48, 47, 46, 44, 43, 41, 40, 39, + 37, 36, 35, 34, 32, 31, 30, 28, 27, 26, 24, 23, 22, 20, 19, 37, + 35, 34, 33, 31, 30, 29, 27, 26, 24, 23, 21, 20, 18, 17, 15, 29, + 27, 26, 24, 22, 21, 19, 17, 16, 14, 26, 25, 23, 21, 19, 18, 16, + 15, 27, 25, 23, 21, 19, 17, 16, 14, 26, 25, 23, 21, 18, 17, 14, + 12, 17, 19, 13, 4, 9, 2, 11, 1, 7, 8, 0, 16, 3, 14, 6, + 12, 10, 5, 15, 18, 11, 10, 13, 15, 16, 19, 20, 22, 24, 27, 15, + 18, 20, 22, 24, 26, 14, 17, 20, 22, 24, 27, 15, 18, 20, 23, 25, + 28, 16, 19, 22, 25, 28, 32, 36, 21, 25, 29, 33, 38, 42, 45, 49, + 28, 31, 34, 37, 40, 42, 44, 47, 49, 50, 52, 54, 56, 57, 59, 60, + 62, 64, 66, 67, 69, 35, 37, 39, 40, 42, 43, 45, 47, 48, 51, 52, + 54, 55, 57, 59, 60, 62, 63, 66, 67, 69, 71, 72, 38, 40, 42, 43, + 46, 47, 49, 51, 26, 28, 30, 31, 33, 34, 18, 19, 11, 13, 7, 8, +}; + +static const unsigned char classic_add_chroma[256] = { + 3, 1, 2, 2, 2, 2, 3, 3, 7, 5, 7, 5, 8, 6, 11, 9, + 7, 13, 11, 10, 9, 8, 7, 5, 9, 7, 6, 4, 7, 5, 8, 7, + 11, 8, 13, 11, 19, 15, 22, 23, 20, 33, 32, 28, 27, 29, 51, 77, + 43, 45, 76, 81, 46, 82, 75, 55, 56,144, 58, 80, 60, 74,147, 63, + 143, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 27, 30, 21, 22, + 17, 14, 5, 6,100, 54, 47, 50, 51, 53,106,107,108,109,110,111, + 112,113,114,115, 4,117,118, 92, 94,121,122, 3,124,103, 2, 1, + 0,129,130,131,120,119,126,125,136,137,138,139,140,141,142,134, + 135,132,133,104, 64,101, 62, 57,102, 95, 93, 59, 61, 28, 97, 96, + 52, 49, 48, 29, 32, 25, 24, 46, 23, 98, 45, 44, 43, 20, 42, 41, + 19, 18, 99, 40, 15, 39, 38, 16, 13, 12, 11, 37, 10, 9, 8, 36, + 7,128,127,105,123,116, 35, 34, 33,145, 31, 79, 42,146, 78, 26, + 83, 48, 49, 50, 44, 47, 26, 31, 30, 18, 17, 19, 21, 24, 25, 13, + 14, 16, 17, 18, 20, 21, 12, 14, 15, 9, 10, 6, 9, 6, 5, 8, + 6, 12, 8, 10, 7, 9, 6, 4, 6, 2, 2, 3, 3, 3, 3, 2, +}; + +static inline int add_left_prediction(uint8_t *dst, uint8_t *src, int w, int acc){ + int i; + + for(i=0; idsp.diff_bytes(dst+16, src+16, src+15, w-16); + return src[w-1]; + } +} + +static void read_len_table(uint8_t *dst, GetBitContext *gb){ + int i, val, repeat; + + for(i=0; i<256;){ + repeat= get_bits(gb, 3); + val = get_bits(gb, 5); + if(repeat==0) + repeat= get_bits(gb, 8); +//printf("%d %d\n", val, repeat); + while (repeat--) + dst[i++] = val; + } +} + +static int generate_bits_table(uint32_t *dst, uint8_t *len_table){ + int len, index; + uint32_t bits=0; + + for(len=32; len>0; len--){ + for(index=0; index<256; index++){ + if(len_table[index]==len) + dst[index]= bits++; + } + if(bits & 1){ + av_log(NULL, AV_LOG_ERROR, "Error generating huffman table\n"); + return -1; + } + bits >>= 1; + } + return 0; +} + +static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){ + uint64_t counts[2*size]; + int up[2*size]; + int offset, i, next; + + for(offset=1; ; offset<<=1){ + for(i=0; i counts[i]){ + if(min1 > counts[i]){ + min2= min1; + min2_i= min1_i; + min1= counts[i]; + min1_i= i; + }else{ + min2= counts[i]; + min2_i= i; + } + } + } + + if(min2==INT64_MAX) break; + + counts[next]= min1 + min2; + counts[min1_i]= + counts[min2_i]= INT64_MAX; + up[min1_i]= + up[min2_i]= next; + up[next]= -1; + } + + for(i=0; i= 32) break; + + dst[i]= len; + } + if(i==size) break; + } +} + +static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){ + GetBitContext gb; + int i; + + init_get_bits(&gb, src, length*8); + + for(i=0; i<3; i++){ + read_len_table(s->len[i], &gb); + + if(generate_bits_table(s->bits[i], s->len[i])<0){ + return -1; + } +#if 0 +for(j=0; j<256; j++){ +printf("%6X, %2d, %3d\n", s->bits[i][j], s->len[i][j], j); +} +#endif + free_vlc(&s->vlc[i]); + init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0); + } + + return (get_bits_count(&gb)+7)/8; +} + +static int read_old_huffman_tables(HYuvContext *s){ +#if 1 + GetBitContext gb; + int i; + + init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8); + read_len_table(s->len[0], &gb); + init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8); + read_len_table(s->len[1], &gb); + + for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma [i]; + for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i]; + + if(s->bitstream_bpp >= 24){ + memcpy(s->bits[1], s->bits[0], 256*sizeof(uint32_t)); + memcpy(s->len[1] , s->len [0], 256*sizeof(uint8_t)); + } + memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t)); + memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t)); + + for(i=0; i<3; i++){ + free_vlc(&s->vlc[i]); + init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0); + } + + return 0; +#else + av_log(s->avctx, AV_LOG_DEBUG, "v1 huffyuv is not supported \n"); + return -1; +#endif +} + +static void alloc_temp(HYuvContext *s){ + int i; + + if(s->bitstream_bpp<24){ + for(i=0; i<3; i++){ + s->temp[i]= av_malloc(s->width + 16); + } + }else{ + s->temp[0]= av_malloc(4*s->width + 16); + } +} + +static int common_init(AVCodecContext *avctx){ + HYuvContext *s = avctx->priv_data; + + s->avctx= avctx; + s->flags= avctx->flags; + + dsputil_init(&s->dsp, avctx); + + s->width= avctx->width; + s->height= avctx->height; + assert(s->width>0 && s->height>0); + + return 0; +} + +static int decode_init(AVCodecContext *avctx) +{ + HYuvContext *s = avctx->priv_data; + + common_init(avctx); + memset(s->vlc, 0, 3*sizeof(VLC)); + + avctx->coded_frame= &s->picture; + s->interlaced= s->height > 288; + +s->bgr32=1; +//if(avctx->extradata) +// printf("extradata:%X, extradata_size:%d\n", *(uint32_t*)avctx->extradata, avctx->extradata_size); + if(avctx->extradata_size){ + if((avctx->bits_per_sample&7) && avctx->bits_per_sample != 12) + s->version=1; // do such files exist at all? + else + s->version=2; + }else + s->version=0; + + if(s->version==2){ + int method, interlace; + + method= ((uint8_t*)avctx->extradata)[0]; + s->decorrelate= method&64 ? 1 : 0; + s->predictor= method&63; + s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1]; + if(s->bitstream_bpp==0) + s->bitstream_bpp= avctx->bits_per_sample&~7; + interlace= (((uint8_t*)avctx->extradata)[2] & 0x30) >> 4; + s->interlaced= (interlace==1) ? 1 : (interlace==2) ? 0 : s->interlaced; + s->context= ((uint8_t*)avctx->extradata)[2] & 0x40 ? 1 : 0; + + if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0) + return -1; + }else{ + switch(avctx->bits_per_sample&7){ + case 1: + s->predictor= LEFT; + s->decorrelate= 0; + break; + case 2: + s->predictor= LEFT; + s->decorrelate= 1; + break; + case 3: + s->predictor= PLANE; + s->decorrelate= avctx->bits_per_sample >= 24; + break; + case 4: + s->predictor= MEDIAN; + s->decorrelate= 0; + break; + default: + s->predictor= LEFT; //OLD + s->decorrelate= 0; + break; + } + s->bitstream_bpp= avctx->bits_per_sample & ~7; + s->context= 0; + + if(read_old_huffman_tables(s) < 0) + return -1; + } + + switch(s->bitstream_bpp){ + case 12: + avctx->pix_fmt = PIX_FMT_YUV420P; + break; + case 16: + if(s->yuy2){ + avctx->pix_fmt = PIX_FMT_YUV422; + }else{ + avctx->pix_fmt = PIX_FMT_YUV422P; + } + break; + case 24: + case 32: + if(s->bgr32){ + avctx->pix_fmt = PIX_FMT_RGBA32; + }else{ + avctx->pix_fmt = PIX_FMT_BGR24; + } + break; + default: + assert(0); + } + + alloc_temp(s); + +// av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced); + + return 0; +} + +static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){ + int i; + int index= 0; + + for(i=0; i<256;){ + int val= len[i]; + int repeat=0; + + for(; i<256 && len[i]==val && repeat<255; i++) + repeat++; + + assert(val < 32 && val >0 && repeat<256 && repeat>0); + if(repeat>7){ + buf[index++]= val; + buf[index++]= repeat; + }else{ + buf[index++]= val | (repeat<<5); + } + } + + return index; +} + +static int encode_init(AVCodecContext *avctx) +{ + HYuvContext *s = avctx->priv_data; + int i, j; + + common_init(avctx); + + avctx->extradata= av_mallocz(1024*30); // 256*3+4 == 772 + avctx->stats_out= av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132 + s->version=2; + + avctx->coded_frame= &s->picture; + + switch(avctx->pix_fmt){ + case PIX_FMT_YUV420P: + s->bitstream_bpp= 12; + break; + case PIX_FMT_YUV422P: + s->bitstream_bpp= 16; + break; + default: + av_log(avctx, AV_LOG_ERROR, "format not supported\n"); + return -1; + } + avctx->bits_per_sample= s->bitstream_bpp; + s->decorrelate= s->bitstream_bpp >= 24; + s->predictor= avctx->prediction_method; + s->interlaced= avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0; + if(avctx->context_model==1){ + s->context= avctx->context_model; + if(s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)){ + av_log(avctx, AV_LOG_ERROR, "context=1 is not compatible with 2 pass huffyuv encoding\n"); + return -1; + } + }else s->context= 0; + + if(avctx->codec->id==CODEC_ID_HUFFYUV){ + if(avctx->pix_fmt==PIX_FMT_YUV420P){ + av_log(avctx, AV_LOG_ERROR, "Error: YV12 is not supported by huffyuv; use vcodec=ffvhuff or format=422p\n"); + return -1; + } + if(avctx->context_model){ + av_log(avctx, AV_LOG_ERROR, "Error: per-frame huffman tables are not supported by huffyuv; use vcodec=ffvhuff\n"); + return -1; + } + if(s->interlaced != ( s->height > 288 )) + av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n"); + } + + ((uint8_t*)avctx->extradata)[0]= s->predictor; + ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp; + ((uint8_t*)avctx->extradata)[2]= s->interlaced ? 0x10 : 0x20; + if(s->context) + ((uint8_t*)avctx->extradata)[2]|= 0x40; + ((uint8_t*)avctx->extradata)[3]= 0; + s->avctx->extradata_size= 4; + + if(avctx->stats_in){ + char *p= avctx->stats_in; + + for(i=0; i<3; i++) + for(j=0; j<256; j++) + s->stats[i][j]= 1; + + for(;;){ + for(i=0; i<3; i++){ + char *next; + + for(j=0; j<256; j++){ + s->stats[i][j]+= strtol(p, &next, 0); + if(next==p) return -1; + p=next; + } + } + if(p[0]==0 || p[1]==0 || p[2]==0) break; + } + }else{ + for(i=0; i<3; i++) + for(j=0; j<256; j++){ + int d= FFMIN(j, 256-j); + + s->stats[i][j]= 100000000/(d+1); + } + } + + for(i=0; i<3; i++){ + generate_len_table(s->len[i], s->stats[i], 256); + + if(generate_bits_table(s->bits[i], s->len[i])<0){ + return -1; + } + + s->avctx->extradata_size+= + store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]); + } + + if(s->context){ + for(i=0; i<3; i++){ + int pels = s->width*s->height / (i?40:10); + for(j=0; j<256; j++){ + int d= FFMIN(j, 256-j); + s->stats[i][j]= pels/(d+1); + } + } + }else{ + for(i=0; i<3; i++) + for(j=0; j<256; j++) + s->stats[i][j]= 0; + } + +// printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced); + + alloc_temp(s); + + s->picture_number=0; + + return 0; +} + +static void decode_422_bitstream(HYuvContext *s, int count){ + int i; + + count/=2; + + for(i=0; itemp[0][2*i ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[1][ i ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[2][ i ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); + } +} + +static void decode_gray_bitstream(HYuvContext *s, int count){ + int i; + + count/=2; + + for(i=0; itemp[0][2*i ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + } +} + +static int encode_422_bitstream(HYuvContext *s, int count){ + int i; + + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 2*4*count){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + + count/=2; + if(s->flags&CODEC_FLAG_PASS1){ + for(i=0; istats[0][ s->temp[0][2*i ] ]++; + s->stats[1][ s->temp[1][ i ] ]++; + s->stats[0][ s->temp[0][2*i+1] ]++; + s->stats[2][ s->temp[2][ i ] ]++; + } + } + if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT) + return 0; + if(s->context){ + for(i=0; istats[0][ s->temp[0][2*i ] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); + s->stats[1][ s->temp[1][ i ] ]++; + put_bits(&s->pb, s->len[1][ s->temp[1][ i ] ], s->bits[1][ s->temp[1][ i ] ]); + s->stats[0][ s->temp[0][2*i+1] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); + s->stats[2][ s->temp[2][ i ] ]++; + put_bits(&s->pb, s->len[2][ s->temp[2][ i ] ], s->bits[2][ s->temp[2][ i ] ]); + } + }else{ + for(i=0; ipb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); + put_bits(&s->pb, s->len[1][ s->temp[1][ i ] ], s->bits[1][ s->temp[1][ i ] ]); + put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); + put_bits(&s->pb, s->len[2][ s->temp[2][ i ] ], s->bits[2][ s->temp[2][ i ] ]); + } + } + return 0; +} + +static int encode_gray_bitstream(HYuvContext *s, int count){ + int i; + + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 4*count){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + + count/=2; + if(s->flags&CODEC_FLAG_PASS1){ + for(i=0; istats[0][ s->temp[0][2*i ] ]++; + s->stats[0][ s->temp[0][2*i+1] ]++; + } + } + if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT) + return 0; + + if(s->context){ + for(i=0; istats[0][ s->temp[0][2*i ] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); + s->stats[0][ s->temp[0][2*i+1] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); + } + }else{ + for(i=0; ipb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); + put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); + } + } + return 0; +} + +static void decode_bgr_bitstream(HYuvContext *s, int count){ + int i; + + if(s->decorrelate){ + if(s->bitstream_bpp==24){ + for(i=0; itemp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G]; + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G]; + } + }else{ + for(i=0; itemp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G]; + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G]; + get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?! + } + } + }else{ + if(s->bitstream_bpp==24){ + for(i=0; itemp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); + } + }else{ + for(i=0; itemp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); + get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?! + } + } + } +} + +static void draw_slice(HYuvContext *s, int y){ + int h, cy; + int offset[4]; + + if(s->avctx->draw_horiz_band==NULL) + return; + + h= y - s->last_slice_end; + y -= h; + + if(s->bitstream_bpp==12){ + cy= y>>1; + }else{ + cy= y; + } + + offset[0] = s->picture.linesize[0]*y; + offset[1] = s->picture.linesize[1]*cy; + offset[2] = s->picture.linesize[2]*cy; + offset[3] = 0; + emms_c(); + + s->avctx->draw_horiz_band(s->avctx, &s->picture, offset, y, 3, h); + + s->last_slice_end= y + h; +} + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ + HYuvContext *s = avctx->priv_data; + const int width= s->width; + const int width2= s->width>>1; + const int height= s->height; + int fake_ystride, fake_ustride, fake_vstride; + AVFrame * const p= &s->picture; + int table_size= 0; + + AVFrame *picture = data; + + s->bitstream_buffer= av_fast_realloc(s->bitstream_buffer, &s->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE); + + s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4); + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference= 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + if(s->context){ + table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size); + if(table_size < 0) + return -1; + } + + if((unsigned)(buf_size-table_size) >= INT_MAX/8) + return -1; + + init_get_bits(&s->gb, s->bitstream_buffer+table_size, (buf_size-table_size)*8); + + fake_ystride= s->interlaced ? p->linesize[0]*2 : p->linesize[0]; + fake_ustride= s->interlaced ? p->linesize[1]*2 : p->linesize[1]; + fake_vstride= s->interlaced ? p->linesize[2]*2 : p->linesize[2]; + + s->last_slice_end= 0; + + if(s->bitstream_bpp<24){ + int y, cy; + int lefty, leftu, leftv; + int lefttopy, lefttopu, lefttopv; + + if(s->yuy2){ + p->data[0][3]= get_bits(&s->gb, 8); + p->data[0][2]= get_bits(&s->gb, 8); + p->data[0][1]= get_bits(&s->gb, 8); + p->data[0][0]= get_bits(&s->gb, 8); + + av_log(avctx, AV_LOG_ERROR, "YUY2 output is not implemented yet\n"); + return -1; + }else{ + + leftv= p->data[2][0]= get_bits(&s->gb, 8); + lefty= p->data[0][1]= get_bits(&s->gb, 8); + leftu= p->data[1][0]= get_bits(&s->gb, 8); + p->data[0][0]= get_bits(&s->gb, 8); + + switch(s->predictor){ + case LEFT: + case PLANE: + decode_422_bitstream(s, width-2); + lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty); + if(!(s->flags&CODEC_FLAG_GRAY)){ + leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu); + leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv); + } + + for(cy=y=1; yheight; y++,cy++){ + uint8_t *ydst, *udst, *vdst; + + if(s->bitstream_bpp==12){ + decode_gray_bitstream(s, width); + + ydst= p->data[0] + p->linesize[0]*y; + + lefty= add_left_prediction(ydst, s->temp[0], width, lefty); + if(s->predictor == PLANE){ + if(y>s->interlaced) + s->dsp.add_bytes(ydst, ydst - fake_ystride, width); + } + y++; + if(y>=s->height) break; + } + + draw_slice(s, y); + + ydst= p->data[0] + p->linesize[0]*y; + udst= p->data[1] + p->linesize[1]*cy; + vdst= p->data[2] + p->linesize[2]*cy; + + decode_422_bitstream(s, width); + lefty= add_left_prediction(ydst, s->temp[0], width, lefty); + if(!(s->flags&CODEC_FLAG_GRAY)){ + leftu= add_left_prediction(udst, s->temp[1], width2, leftu); + leftv= add_left_prediction(vdst, s->temp[2], width2, leftv); + } + if(s->predictor == PLANE){ + if(cy>s->interlaced){ + s->dsp.add_bytes(ydst, ydst - fake_ystride, width); + if(!(s->flags&CODEC_FLAG_GRAY)){ + s->dsp.add_bytes(udst, udst - fake_ustride, width2); + s->dsp.add_bytes(vdst, vdst - fake_vstride, width2); + } + } + } + } + draw_slice(s, height); + + break; + case MEDIAN: + /* first line except first 2 pixels is left predicted */ + decode_422_bitstream(s, width-2); + lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty); + if(!(s->flags&CODEC_FLAG_GRAY)){ + leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu); + leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv); + } + + cy=y=1; + + /* second line is left predicted for interlaced case */ + if(s->interlaced){ + decode_422_bitstream(s, width); + lefty= add_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty); + if(!(s->flags&CODEC_FLAG_GRAY)){ + leftu= add_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu); + leftv= add_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv); + } + y++; cy++; + } + + /* next 4 pixels are left predicted too */ + decode_422_bitstream(s, 4); + lefty= add_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty); + if(!(s->flags&CODEC_FLAG_GRAY)){ + leftu= add_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu); + leftv= add_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv); + } + + /* next line except the first 4 pixels is median predicted */ + lefttopy= p->data[0][3]; + decode_422_bitstream(s, width-4); + add_median_prediction(p->data[0] + fake_ystride+4, p->data[0]+4, s->temp[0], width-4, &lefty, &lefttopy); + if(!(s->flags&CODEC_FLAG_GRAY)){ + lefttopu= p->data[1][1]; + lefttopv= p->data[2][1]; + add_median_prediction(p->data[1] + fake_ustride+2, p->data[1]+2, s->temp[1], width2-2, &leftu, &lefttopu); + add_median_prediction(p->data[2] + fake_vstride+2, p->data[2]+2, s->temp[2], width2-2, &leftv, &lefttopv); + } + y++; cy++; + + for(; ybitstream_bpp==12){ + while(2*cy > y){ + decode_gray_bitstream(s, width); + ydst= p->data[0] + p->linesize[0]*y; + add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); + y++; + } + if(y>=height) break; + } + draw_slice(s, y); + + decode_422_bitstream(s, width); + + ydst= p->data[0] + p->linesize[0]*y; + udst= p->data[1] + p->linesize[1]*cy; + vdst= p->data[2] + p->linesize[2]*cy; + + add_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); + if(!(s->flags&CODEC_FLAG_GRAY)){ + add_median_prediction(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu); + add_median_prediction(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv); + } + } + + draw_slice(s, height); + break; + } + } + }else{ + int y; + int leftr, leftg, leftb; + const int last_line= (height-1)*p->linesize[0]; + + if(s->bitstream_bpp==32){ + skip_bits(&s->gb, 8); + leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8); + leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8); + leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8); + }else{ + leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8); + leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8); + leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8); + skip_bits(&s->gb, 8); + } + + if(s->bgr32){ + switch(s->predictor){ + case LEFT: + case PLANE: + decode_bgr_bitstream(s, width-1); + add_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb); + + for(y=s->height-2; y>=0; y--){ //yes its stored upside down + decode_bgr_bitstream(s, width); + + add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb); + if(s->predictor == PLANE){ + if((y&s->interlaced)==0 && yheight-1-s->interlaced){ + s->dsp.add_bytes(p->data[0] + p->linesize[0]*y, + p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride); + } + } + } + draw_slice(s, height); // just 1 large slice as this is not possible in reverse order + break; + default: + av_log(avctx, AV_LOG_ERROR, "prediction type not supported!\n"); + } + }else{ + + av_log(avctx, AV_LOG_ERROR, "BGR24 output is not implemented yet\n"); + return -1; + } + } + emms_c(); + + *picture= *p; + *data_size = sizeof(AVFrame); + + return (get_bits_count(&s->gb)+31)/32*4 + table_size; +} + +static int common_end(HYuvContext *s){ + int i; + + for(i=0; i<3; i++){ + av_freep(&s->temp[i]); + } + return 0; +} + +static int decode_end(AVCodecContext *avctx) +{ + HYuvContext *s = avctx->priv_data; + int i; + + common_end(s); + av_freep(&s->bitstream_buffer); + + for(i=0; i<3; i++){ + free_vlc(&s->vlc[i]); + } + + return 0; +} + +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + HYuvContext *s = avctx->priv_data; + AVFrame *pict = data; + const int width= s->width; + const int width2= s->width>>1; + const int height= s->height; + const int fake_ystride= s->interlaced ? pict->linesize[0]*2 : pict->linesize[0]; + const int fake_ustride= s->interlaced ? pict->linesize[1]*2 : pict->linesize[1]; + const int fake_vstride= s->interlaced ? pict->linesize[2]*2 : pict->linesize[2]; + AVFrame * const p= &s->picture; + int i, j, size=0; + + *p = *pict; + p->pict_type= FF_I_TYPE; + p->key_frame= 1; + + if(s->context){ + for(i=0; i<3; i++){ + generate_len_table(s->len[i], s->stats[i], 256); + if(generate_bits_table(s->bits[i], s->len[i])<0) + return -1; + size+= store_table(s, s->len[i], &buf[size]); + } + + for(i=0; i<3; i++) + for(j=0; j<256; j++) + s->stats[i][j] >>= 1; + } + + init_put_bits(&s->pb, buf+size, buf_size-size); + + if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){ + int lefty, leftu, leftv, y, cy; + + put_bits(&s->pb, 8, leftv= p->data[2][0]); + put_bits(&s->pb, 8, lefty= p->data[0][1]); + put_bits(&s->pb, 8, leftu= p->data[1][0]); + put_bits(&s->pb, 8, p->data[0][0]); + + lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty); + leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu); + leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv); + + encode_422_bitstream(s, width-2); + + if(s->predictor==MEDIAN){ + int lefttopy, lefttopu, lefttopv; + cy=y=1; + if(s->interlaced){ + lefty= sub_left_prediction(s, s->temp[0], p->data[0]+p->linesize[0], width , lefty); + leftu= sub_left_prediction(s, s->temp[1], p->data[1]+p->linesize[1], width2, leftu); + leftv= sub_left_prediction(s, s->temp[2], p->data[2]+p->linesize[2], width2, leftv); + + encode_422_bitstream(s, width); + y++; cy++; + } + + lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty); + leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ustride, 2, leftu); + leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_vstride, 2, leftv); + + encode_422_bitstream(s, 4); + + lefttopy= p->data[0][3]; + lefttopu= p->data[1][1]; + lefttopv= p->data[2][1]; + s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); + s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); + encode_422_bitstream(s, width-4); + y++; cy++; + + for(; ybitstream_bpp==12){ + while(2*cy > y){ + ydst= p->data[0] + p->linesize[0]*y; + s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); + encode_gray_bitstream(s, width); + y++; + } + if(y>=height) break; + } + ydst= p->data[0] + p->linesize[0]*y; + udst= p->data[1] + p->linesize[1]*cy; + vdst= p->data[2] + p->linesize[2]*cy; + + s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); + s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); + s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); + + encode_422_bitstream(s, width); + } + }else{ + for(cy=y=1; ybitstream_bpp==12){ + ydst= p->data[0] + p->linesize[0]*y; + + if(s->predictor == PLANE && s->interlaced < y){ + s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); + + lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); + }else{ + lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty); + } + encode_gray_bitstream(s, width); + y++; + if(y>=height) break; + } + + ydst= p->data[0] + p->linesize[0]*y; + udst= p->data[1] + p->linesize[1]*cy; + vdst= p->data[2] + p->linesize[2]*cy; + + if(s->predictor == PLANE && s->interlaced < cy){ + s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); + s->dsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); + s->dsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); + + lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); + leftu= sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu); + leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + width2, width2, leftv); + }else{ + lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty); + leftu= sub_left_prediction(s, s->temp[1], udst, width2, leftu); + leftv= sub_left_prediction(s, s->temp[2], vdst, width2, leftv); + } + + encode_422_bitstream(s, width); + } + } + }else{ + av_log(avctx, AV_LOG_ERROR, "Format not supported!\n"); + } + emms_c(); + + size+= (put_bits_count(&s->pb)+31)/8; + size/= 4; + + if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){ + int j; + char *p= avctx->stats_out; + char *end= p + 1024*30; + for(i=0; i<3; i++){ + for(j=0; j<256; j++){ + snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]); + p+= strlen(p); + s->stats[i][j]= 0; + } + snprintf(p, end-p, "\n"); + p++; + } + } + if(!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)){ + flush_put_bits(&s->pb); + s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size); + avctx->stats_out[0] = '\0'; + } + + s->picture_number++; + + return size*4; +} + +static int encode_end(AVCodecContext *avctx) +{ + HYuvContext *s = avctx->priv_data; + + common_end(s); + + av_freep(&avctx->extradata); + av_freep(&avctx->stats_out); + + return 0; +} + +AVCodec huffyuv_decoder = { + "huffyuv", + CODEC_TYPE_VIDEO, + CODEC_ID_HUFFYUV, + sizeof(HYuvContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND, + NULL +}; + +AVCodec ffvhuff_decoder = { + "ffvhuff", + CODEC_TYPE_VIDEO, + CODEC_ID_FFVHUFF, + sizeof(HYuvContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND, + NULL +}; + +#ifdef CONFIG_ENCODERS + +AVCodec huffyuv_encoder = { + "huffyuv", + CODEC_TYPE_VIDEO, + CODEC_ID_HUFFYUV, + sizeof(HYuvContext), + encode_init, + encode_frame, + encode_end, +}; + +AVCodec ffvhuff_encoder = { + "ffvhuff", + CODEC_TYPE_VIDEO, + CODEC_ID_FFVHUFF, + sizeof(HYuvContext), + encode_init, + encode_frame, + encode_end, +}; + +#endif //CONFIG_ENCODERS diff --git a/mpeg4/src/libavcodec/i386/cputest.c b/mpeg4/src/libavcodec/i386/cputest.c new file mode 100644 index 0000000000000000000000000000000000000000..64656c65a5e087ca7299450ddd779c31dc6f2556 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/cputest.c @@ -0,0 +1,131 @@ +/* Cpu detection code, extracted from mmx.h ((c)1997-99 by H. Dietz + and R. Fisher). Converted to C and improved by Fabrice Bellard */ + +#include +#include "../dsputil.h" + +#ifdef ARCH_X86_64 +# define REG_b "rbx" +# define REG_S "rsi" +#else +# define REG_b "ebx" +# define REG_S "esi" +#endif + +/* ebx saving is necessary for PIC. gcc seems unable to see it alone */ +#define cpuid(index,eax,ebx,ecx,edx)\ + __asm __volatile\ + ("mov %%"REG_b", %%"REG_S"\n\t"\ + "cpuid\n\t"\ + "xchg %%"REG_b", %%"REG_S\ + : "=a" (eax), "=S" (ebx),\ + "=c" (ecx), "=d" (edx)\ + : "0" (index)); + +/* Function to test if multimedia instructions are supported... */ +int mm_support(void) +{ + int rval = 0; + int eax, ebx, ecx, edx; + int max_std_level, max_ext_level, std_caps=0, ext_caps=0; + long a, c; + + __asm__ __volatile__ ( + /* See if CPUID instruction is supported ... */ + /* ... Get copies of EFLAGS into eax and ecx */ + "pushf\n\t" + "pop %0\n\t" + "mov %0, %1\n\t" + + /* ... Toggle the ID bit in one copy and store */ + /* to the EFLAGS reg */ + "xor $0x200000, %0\n\t" + "push %0\n\t" + "popf\n\t" + + /* ... Get the (hopefully modified) EFLAGS */ + "pushf\n\t" + "pop %0\n\t" + : "=a" (a), "=c" (c) + : + : "cc" + ); + + if (a == c) + return 0; /* CPUID not supported */ + + cpuid(0, max_std_level, ebx, ecx, edx); + + if(max_std_level >= 1){ + cpuid(1, eax, ebx, ecx, std_caps); + if (std_caps & (1<<23)) + rval |= MM_MMX; + if (std_caps & (1<<25)) + rval |= MM_MMXEXT | MM_SSE; + if (std_caps & (1<<26)) + rval |= MM_SSE2; + } + + cpuid(0x80000000, max_ext_level, ebx, ecx, edx); + + if(max_ext_level >= 0x80000001){ + cpuid(0x80000001, eax, ebx, ecx, ext_caps); + if (ext_caps & (1<<31)) + rval |= MM_3DNOW; + if (ext_caps & (1<<30)) + rval |= MM_3DNOWEXT; + if (ext_caps & (1<<23)) + rval |= MM_MMX; + } + + cpuid(0, eax, ebx, ecx, edx); + if ( ebx == 0x68747541 && + edx == 0x69746e65 && + ecx == 0x444d4163) { + /* AMD */ + if(ext_caps & (1<<22)) + rval |= MM_MMXEXT; + } else if (ebx == 0x746e6543 && + edx == 0x48727561 && + ecx == 0x736c7561) { /* "CentaurHauls" */ + /* VIA C3 */ + if(ext_caps & (1<<24)) + rval |= MM_MMXEXT; + } else if (ebx == 0x69727943 && + edx == 0x736e4978 && + ecx == 0x64616574) { + /* Cyrix Section */ + /* See if extended CPUID level 80000001 is supported */ + /* The value of CPUID/80000001 for the 6x86MX is undefined + according to the Cyrix CPU Detection Guide (Preliminary + Rev. 1.01 table 1), so we'll check the value of eax for + CPUID/0 to see if standard CPUID level 2 is supported. + According to the table, the only CPU which supports level + 2 is also the only one which supports extended CPUID levels. + */ + if (eax < 2) + return rval; + if (ext_caps & (1<<24)) + rval |= MM_MMXEXT; + } +#if 0 + av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", + (rval&MM_MMX) ? "MMX ":"", + (rval&MM_MMXEXT) ? "MMX2 ":"", + (rval&MM_SSE) ? "SSE ":"", + (rval&MM_SSE2) ? "SSE2 ":"", + (rval&MM_3DNOW) ? "3DNow ":"", + (rval&MM_3DNOWEXT) ? "3DNowExt ":""); +#endif + return rval; +} + +#ifdef __TEST__ +int main ( void ) +{ + int mm_flags; + mm_flags = mm_support(); + printf("mm_support = 0x%08X\n",mm_flags); + return 0; +} +#endif diff --git a/mpeg4/src/libavcodec/i386/dsputil_h264_template_mmx.c b/mpeg4/src/libavcodec/i386/dsputil_h264_template_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..b49c880a7382c613d017e3ab3bb18a60fad79344 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/dsputil_h264_template_mmx.c @@ -0,0 +1,324 @@ +/* + * Copyright (c) 2005 Zoltan Hidvegi , + * Loren Merritt + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * MMX optimized version of (put|avg)_h264_chroma_mc8. + * H264_CHROMA_MC8_TMPL must be defined to the desired function name + * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg + * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function + */ +static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) +{ + DECLARE_ALIGNED_8(uint64_t, AA); + DECLARE_ALIGNED_8(uint64_t, DD); + int i; + + if(y==0 && x==0) { + /* no filter needed */ + H264_CHROMA_MC8_MV0(dst, src, stride, h); + return; + } + + assert(x<8 && y<8 && x>=0 && y>=0); + + if(y==0 || x==0) + { + /* 1 dimensional filter only */ + const int dxy = x ? 1 : stride; + + asm volatile( + "movd %0, %%mm5\n\t" + "movq %1, %%mm4\n\t" + "punpcklwd %%mm5, %%mm5\n\t" + "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */ + "movq %%mm4, %%mm6\n\t" + "pxor %%mm7, %%mm7\n\t" + "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */ + "psrlw $1, %%mm6\n\t" /* mm6 = 4 */ + :: "rm"(x+y), "m"(ff_pw_8)); + + for(i=0; i> 3 */ + "paddw %%mm6, %%mm0\n\t" + "paddw %%mm6, %%mm1\n\t" + "paddw %%mm2, %%mm0\n\t" + "paddw %%mm3, %%mm1\n\t" + "psrlw $3, %%mm0\n\t" + "psrlw $3, %%mm1\n\t" + "packuswb %%mm1, %%mm0\n\t" + H264_CHROMA_OP(%0, %%mm0) + "movq %%mm0, %0\n\t" + : "=m" (dst[0])); + + src += stride; + dst += stride; + } + return; + } + + /* general case, bilinear */ + asm volatile("movd %2, %%mm4\n\t" + "movd %3, %%mm6\n\t" + "punpcklwd %%mm4, %%mm4\n\t" + "punpcklwd %%mm6, %%mm6\n\t" + "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */ + "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */ + "movq %%mm4, %%mm5\n\t" + "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */ + "psllw $3, %%mm5\n\t" + "psllw $3, %%mm6\n\t" + "movq %%mm5, %%mm7\n\t" + "paddw %%mm6, %%mm7\n\t" + "movq %%mm4, %1\n\t" /* DD = x * y */ + "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */ + "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */ + "paddw %4, %%mm4\n\t" + "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */ + "pxor %%mm7, %%mm7\n\t" + "movq %%mm4, %0\n\t" + : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); + + asm volatile( + /* mm0 = src[0..7], mm1 = src[1..8] */ + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + : : "m" (src[0]), "m" (src[1])); + + for(i=0; i> 6 */ + "paddw %1, %%mm2\n\t" + "paddw %1, %%mm3\n\t" + "psrlw $6, %%mm2\n\t" + "psrlw $6, %%mm3\n\t" + "packuswb %%mm3, %%mm2\n\t" + H264_CHROMA_OP(%0, %%mm2) + "movq %%mm2, %0\n\t" + : "=m" (dst[0]) : "m" (ff_pw_32)); + dst+= stride; + } +} + +static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) +{ + DECLARE_ALIGNED_8(uint64_t, AA); + DECLARE_ALIGNED_8(uint64_t, DD); + int i; + + /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*. + * could still save a few cycles, but maybe not worth the complexity. */ + + assert(x<8 && y<8 && x>=0 && y>=0); + + asm volatile("movd %2, %%mm4\n\t" + "movd %3, %%mm6\n\t" + "punpcklwd %%mm4, %%mm4\n\t" + "punpcklwd %%mm6, %%mm6\n\t" + "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */ + "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */ + "movq %%mm4, %%mm5\n\t" + "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */ + "psllw $3, %%mm5\n\t" + "psllw $3, %%mm6\n\t" + "movq %%mm5, %%mm7\n\t" + "paddw %%mm6, %%mm7\n\t" + "movq %%mm4, %1\n\t" /* DD = x * y */ + "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */ + "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */ + "paddw %4, %%mm4\n\t" + "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */ + "pxor %%mm7, %%mm7\n\t" + "movq %%mm4, %0\n\t" + : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); + + asm volatile( + /* mm0 = src[0..3], mm1 = src[1..4] */ + "movd %0, %%mm0\n\t" + "movd %1, %%mm1\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + : : "m" (src[0]), "m" (src[1])); + + for(i=0; i> 6) */ + "paddw %1, %%mm2\n\t" + "psrlw $6, %%mm2\n\t" + "packuswb %%mm7, %%mm2\n\t" + H264_CHROMA_OP4(%0, %%mm2, %%mm3) + "movd %%mm2, %0\n\t" + : "=m" (dst[0]) : "m" (ff_pw_32)); + dst += stride; + } +} + +#ifdef H264_CHROMA_MC2_TMPL +static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) +{ + int CD=((1<<16)-1)*x*y + 8*y; + int AB=((8<<16)-8)*x + 64 - CD; + int i; + + asm volatile( + /* mm5 = {A,B,A,B} */ + /* mm6 = {C,D,C,D} */ + "movd %0, %%mm5\n\t" + "movd %1, %%mm6\n\t" + "punpckldq %%mm5, %%mm5\n\t" + "punpckldq %%mm6, %%mm6\n\t" + "pxor %%mm7, %%mm7\n\t" + :: "r"(AB), "r"(CD)); + + asm volatile( + /* mm0 = src[0,1,1,2] */ + "movd %0, %%mm0\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "pshufw $0x94, %%mm0, %%mm0\n\t" + :: "m"(src[0])); + + for(i=0; i> 6) */ + "paddw %1, %%mm1\n\t" + "psrlw $6, %%mm1\n\t" + "packssdw %%mm7, %%mm1\n\t" + "packuswb %%mm7, %%mm1\n\t" + /* writes garbage to the right of dst. + * ok because partitions are processed from left to right. */ + H264_CHROMA_OP4(%0, %%mm1, %%mm3) + "movd %%mm1, %0\n\t" + : "=m" (dst[0]) : "m" (ff_pw_32)); + dst += stride; + } +} +#endif + diff --git a/mpeg4/src/libavcodec/i386/dsputil_mmx.c b/mpeg4/src/libavcodec/i386/dsputil_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..2bef197ceaae88927a40383cd09fe5cb8ebaa619 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/dsputil_mmx.c @@ -0,0 +1,3154 @@ +/* + * MMX optimized DSP utils + * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * MMX optimization by Nick Kurshev + */ + +#include "../dsputil.h" +#include "../simple_idct.h" +#include "../mpegvideo.h" +#include "mmx.h" + +//#undef NDEBUG +//#include + +extern const uint8_t ff_h263_loop_filter_strength[32]; +extern void ff_idct_xvid_mmx(short *block); +extern void ff_idct_xvid_mmx2(short *block); + +int mm_flags; /* multimedia extension flags */ + +/* pixel operations */ +static const uint64_t mm_bone attribute_used __attribute__ ((aligned(8))) = 0x0101010101010101ULL; +static const uint64_t mm_wone attribute_used __attribute__ ((aligned(8))) = 0x0001000100010001ULL; +static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x0002000200020002ULL; + +static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0014001400140014ULL; +static const uint64_t ff_pw_3 attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL; +static const uint64_t ff_pw_4 attribute_used __attribute__ ((aligned(8))) = 0x0004000400040004ULL; +static const uint64_t ff_pw_5 attribute_used __attribute__ ((aligned(8))) = 0x0005000500050005ULL; +static const uint64_t ff_pw_8 attribute_used __attribute__ ((aligned(8))) = 0x0008000800080008ULL; +static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL; +static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL; +static const uint64_t ff_pw_64 attribute_used __attribute__ ((aligned(8))) = 0x0040004000400040ULL; +static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x000F000F000F000FULL; + +static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL; +static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; + +#define JUMPALIGN() __asm __volatile (".balign 8"::) +#define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) + +#define MOVQ_WONE(regd) \ + __asm __volatile ( \ + "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ + "psrlw $15, %%" #regd ::) + +#define MOVQ_BFE(regd) \ + __asm __volatile ( \ + "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ + "paddb %%" #regd ", %%" #regd " \n\t" ::) + +#ifndef PIC +#define MOVQ_BONE(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_bone)) +#define MOVQ_WTWO(regd) __asm __volatile ("movq %0, %%" #regd " \n\t" ::"m"(mm_wtwo)) +#else +// for shared library it's better to use this way for accessing constants +// pcmpeqd -> -1 +#define MOVQ_BONE(regd) \ + __asm __volatile ( \ + "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ + "psrlw $15, %%" #regd " \n\t" \ + "packuswb %%" #regd ", %%" #regd " \n\t" ::) + +#define MOVQ_WTWO(regd) \ + __asm __volatile ( \ + "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ + "psrlw $15, %%" #regd " \n\t" \ + "psllw $1, %%" #regd " \n\t"::) + +#endif + +// using regr as temporary and for the output result +// first argument is unmodifed and second is trashed +// regfe is supposed to contain 0xfefefefefefefefe +#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \ + "movq " #rega ", " #regr " \n\t"\ + "pand " #regb ", " #regr " \n\t"\ + "pxor " #rega ", " #regb " \n\t"\ + "pand " #regfe "," #regb " \n\t"\ + "psrlq $1, " #regb " \n\t"\ + "paddb " #regb ", " #regr " \n\t" + +#define PAVGB_MMX(rega, regb, regr, regfe) \ + "movq " #rega ", " #regr " \n\t"\ + "por " #regb ", " #regr " \n\t"\ + "pxor " #rega ", " #regb " \n\t"\ + "pand " #regfe "," #regb " \n\t"\ + "psrlq $1, " #regb " \n\t"\ + "psubb " #regb ", " #regr " \n\t" + +// mm6 is supposed to contain 0xfefefefefefefefe +#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \ + "movq " #rega ", " #regr " \n\t"\ + "movq " #regc ", " #regp " \n\t"\ + "pand " #regb ", " #regr " \n\t"\ + "pand " #regd ", " #regp " \n\t"\ + "pxor " #rega ", " #regb " \n\t"\ + "pxor " #regc ", " #regd " \n\t"\ + "pand %%mm6, " #regb " \n\t"\ + "pand %%mm6, " #regd " \n\t"\ + "psrlq $1, " #regb " \n\t"\ + "psrlq $1, " #regd " \n\t"\ + "paddb " #regb ", " #regr " \n\t"\ + "paddb " #regd ", " #regp " \n\t" + +#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \ + "movq " #rega ", " #regr " \n\t"\ + "movq " #regc ", " #regp " \n\t"\ + "por " #regb ", " #regr " \n\t"\ + "por " #regd ", " #regp " \n\t"\ + "pxor " #rega ", " #regb " \n\t"\ + "pxor " #regc ", " #regd " \n\t"\ + "pand %%mm6, " #regb " \n\t"\ + "pand %%mm6, " #regd " \n\t"\ + "psrlq $1, " #regd " \n\t"\ + "psrlq $1, " #regb " \n\t"\ + "psubb " #regb ", " #regr " \n\t"\ + "psubb " #regd ", " #regp " \n\t" + +/***********************************/ +/* MMX no rounding */ +#define DEF(x, y) x ## _no_rnd_ ## y ##_mmx +#define SET_RND MOVQ_WONE +#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) +#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) + +#include "dsputil_mmx_rnd.h" + +#undef DEF +#undef SET_RND +#undef PAVGBP +#undef PAVGB +/***********************************/ +/* MMX rounding */ + +#define DEF(x, y) x ## _ ## y ##_mmx +#define SET_RND MOVQ_WTWO +#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) +#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) + +#include "dsputil_mmx_rnd.h" + +#undef DEF +#undef SET_RND +#undef PAVGBP +#undef PAVGB + +/***********************************/ +/* 3Dnow specific */ + +#define DEF(x) x ## _3dnow +/* for Athlons PAVGUSB is prefered */ +#define PAVGB "pavgusb" + +#include "dsputil_mmx_avg.h" + +#undef DEF +#undef PAVGB + +/***********************************/ +/* MMX2 specific */ + +#define DEF(x) x ## _mmx2 + +/* Introduced only in MMX2 set */ +#define PAVGB "pavgb" + +#include "dsputil_mmx_avg.h" + +#undef DEF +#undef PAVGB + +/***********************************/ +/* standard MMX */ + +#ifdef CONFIG_ENCODERS +static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) +{ + asm volatile( + "mov $-128, %%"REG_a" \n\t" + "pxor %%mm7, %%mm7 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0), %%mm0 \n\t" + "movq (%0, %2), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "movq %%mm1, 8(%1, %%"REG_a") \n\t" + "movq %%mm2, 16(%1, %%"REG_a") \n\t" + "movq %%mm3, 24(%1, %%"REG_a") \n\t" + "add %3, %0 \n\t" + "add $32, %%"REG_a" \n\t" + "js 1b \n\t" + : "+r" (pixels) + : "r" (block+64), "r" ((long)line_size), "r" ((long)line_size*2) + : "%"REG_a + ); +} + +static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) +{ + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "mov $-128, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0), %%mm0 \n\t" + "movq (%1), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "movq %%mm1, 8(%2, %%"REG_a") \n\t" + "add %3, %0 \n\t" + "add %3, %1 \n\t" + "add $16, %%"REG_a" \n\t" + "jnz 1b \n\t" + : "+r" (s1), "+r" (s2) + : "r" (block+64), "r" ((long)stride) + : "%"REG_a + ); +} +#endif //CONFIG_ENCODERS + +void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) +{ + const DCTELEM *p; + uint8_t *pix; + + /* read the pixels */ + p = block; + pix = pixels; + /* unrolled loop */ + __asm __volatile( + "movq %3, %%mm0 \n\t" + "movq 8%3, %%mm1 \n\t" + "movq 16%3, %%mm2 \n\t" + "movq 24%3, %%mm3 \n\t" + "movq 32%3, %%mm4 \n\t" + "movq 40%3, %%mm5 \n\t" + "movq 48%3, %%mm6 \n\t" + "movq 56%3, %%mm7 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "packuswb %%mm5, %%mm4 \n\t" + "packuswb %%mm7, %%mm6 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm2, (%0, %1) \n\t" + "movq %%mm4, (%0, %1, 2) \n\t" + "movq %%mm6, (%0, %2) \n\t" + ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p) + :"memory"); + pix += line_size*4; + p += 32; + + // if here would be an exact copy of the code above + // compiler would generate some very strange code + // thus using "r" + __asm __volatile( + "movq (%3), %%mm0 \n\t" + "movq 8(%3), %%mm1 \n\t" + "movq 16(%3), %%mm2 \n\t" + "movq 24(%3), %%mm3 \n\t" + "movq 32(%3), %%mm4 \n\t" + "movq 40(%3), %%mm5 \n\t" + "movq 48(%3), %%mm6 \n\t" + "movq 56(%3), %%mm7 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "packuswb %%mm5, %%mm4 \n\t" + "packuswb %%mm7, %%mm6 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm2, (%0, %1) \n\t" + "movq %%mm4, (%0, %1, 2) \n\t" + "movq %%mm6, (%0, %2) \n\t" + ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p) + :"memory"); +} + +static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) = + { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; + +void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) +{ + int i; + + movq_m2r(*vector128, mm1); + for (i = 0; i < 8; i++) { + movq_m2r(*(block), mm0); + packsswb_m2r(*(block + 4), mm0); + block += 8; + paddb_r2r(mm1, mm0); + movq_r2m(mm0, *pixels); + pixels += line_size; + } +} + +void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) +{ + const DCTELEM *p; + uint8_t *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + MOVQ_ZERO(mm7); + i = 4; + do { + __asm __volatile( + "movq (%2), %%mm0 \n\t" + "movq 8(%2), %%mm1 \n\t" + "movq 16(%2), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "movq %0, %%mm4 \n\t" + "movq %1, %%mm6 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddsw %%mm4, %%mm0 \n\t" + "paddsw %%mm5, %%mm1 \n\t" + "movq %%mm6, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm6 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddsw %%mm6, %%mm2 \n\t" + "paddsw %%mm5, %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "packuswb %%mm3, %%mm2 \n\t" + "movq %%mm0, %0 \n\t" + "movq %%mm2, %1 \n\t" + :"+m"(*pix), "+m"(*(pix+line_size)) + :"r"(p) + :"memory"); + pix += line_size*2; + p += 16; + } while (--i); +} + +static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%1, %3), %%mm1 \n\t" + "movd %%mm0, (%2) \n\t" + "movd %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%1, %3), %%mm1 \n\t" + "movd %%mm0, (%2) \n\t" + "movd %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + : "+g"(h), "+r" (pixels), "+r" (block) + : "r"((long)line_size) + : "%"REG_a, "memory" + ); +} + +static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + : "+g"(h), "+r" (pixels), "+r" (block) + : "r"((long)line_size) + : "%"REG_a, "memory" + ); +} + +static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm4 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq 8(%1, %3), %%mm5 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm4, 8(%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq %%mm5, 8(%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm4 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq 8(%1, %3), %%mm5 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm4, 8(%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq %%mm5, 8(%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + : "+g"(h), "+r" (pixels), "+r" (block) + : "r"((long)line_size) + : "%"REG_a, "memory" + ); +} + +static void clear_blocks_mmx(DCTELEM *blocks) +{ + __asm __volatile( + "pxor %%mm7, %%mm7 \n\t" + "mov $-128*6, %%"REG_a" \n\t" + "1: \n\t" + "movq %%mm7, (%0, %%"REG_a") \n\t" + "movq %%mm7, 8(%0, %%"REG_a") \n\t" + "movq %%mm7, 16(%0, %%"REG_a") \n\t" + "movq %%mm7, 24(%0, %%"REG_a") \n\t" + "add $32, %%"REG_a" \n\t" + " js 1b \n\t" + : : "r" (((uint8_t *)blocks)+128*6) + : "%"REG_a + ); +} + +#ifdef CONFIG_ENCODERS +static int pix_sum16_mmx(uint8_t * pix, int line_size){ + const int h=16; + int sum; + long index= -line_size*h; + + __asm __volatile( + "pxor %%mm7, %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq (%2, %1), %%mm0 \n\t" + "movq (%2, %1), %%mm1 \n\t" + "movq 8(%2, %1), %%mm2 \n\t" + "movq 8(%2, %1), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "paddw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm6 \n\t" + "add %3, %1 \n\t" + " js 1b \n\t" + "movq %%mm6, %%mm5 \n\t" + "psrlq $32, %%mm6 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "movq %%mm6, %%mm5 \n\t" + "psrlq $16, %%mm6 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "movd %%mm6, %0 \n\t" + "andl $0xFFFF, %0 \n\t" + : "=&r" (sum), "+r" (index) + : "r" (pix - index), "r" ((long)line_size) + ); + + return sum; +} +#endif //CONFIG_ENCODERS + +static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ + long i=0; + asm volatile( + "1: \n\t" + "movq (%1, %0), %%mm0 \n\t" + "movq (%2, %0), %%mm1 \n\t" + "paddb %%mm0, %%mm1 \n\t" + "movq %%mm1, (%2, %0) \n\t" + "movq 8(%1, %0), %%mm0 \n\t" + "movq 8(%2, %0), %%mm1 \n\t" + "paddb %%mm0, %%mm1 \n\t" + "movq %%mm1, 8(%2, %0) \n\t" + "add $16, %0 \n\t" + "cmp %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (i) + : "r"(src), "r"(dst), "r"((long)w-15) + ); + for(; idsp.sse[0](c, pix1, pix2, line_size, h); + else score1 = sse16_mmx(c, pix1, pix2, line_size, h); + score2= hf_noise16_mmx(pix1, line_size, h) - hf_noise16_mmx(pix2, line_size, h); + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int nsse8_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + MpegEncContext *c = p; + int score1= sse8_mmx(c, pix1, pix2, line_size, h); + int score2= hf_noise8_mmx(pix1, line_size, h) - hf_noise8_mmx(pix2, line_size, h); + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { + int tmp; + + assert( (((int)pix) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0), %%mm2\n"\ + "movq 8(%0), %%mm3\n"\ + "add %2,%0\n"\ + "movq %%mm2, " #out0 "\n"\ + "movq %%mm3, " #out1 "\n"\ + "psubusb " #in0 ", %%mm2\n"\ + "psubusb " #in1 ", %%mm3\n"\ + "psubusb " #out0 ", " #in0 "\n"\ + "psubusb " #out1 ", " #in1 "\n"\ + "por %%mm2, " #in0 "\n"\ + "por %%mm3, " #in1 "\n"\ + "movq " #in0 ", %%mm2\n"\ + "movq " #in1 ", %%mm3\n"\ + "punpcklbw %%mm7, " #in0 "\n"\ + "punpcklbw %%mm7, " #in1 "\n"\ + "punpckhbw %%mm7, %%mm2\n"\ + "punpckhbw %%mm7, %%mm3\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw %%mm3, %%mm2\n"\ + "paddw %%mm2, " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + + asm volatile ( + "movl %3,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pxor %%mm7,%%mm7\n" + "movq (%0),%%mm0\n" + "movq 8(%0),%%mm1\n" + "add %2,%0\n" + "subl $2, %%ecx\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movq %%mm6,%%mm0\n" + "psrlq $32, %%mm6\n" + "paddw %%mm6,%%mm0\n" + "movq %%mm0,%%mm6\n" + "psrlq $16, %%mm0\n" + "paddw %%mm6,%%mm0\n" + "movd %%mm0,%1\n" + : "+r" (pix), "=r"(tmp) + : "r" ((long)line_size) , "m" (h) + : "%ecx"); + return tmp & 0xFFFF; +} +#undef SUM + +static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { + int tmp; + + assert( (((int)pix) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0), " #out0 "\n"\ + "movq 8(%0), " #out1 "\n"\ + "add %2,%0\n"\ + "psadbw " #out0 ", " #in0 "\n"\ + "psadbw " #out1 ", " #in1 "\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + asm volatile ( + "movl %3,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pxor %%mm7,%%mm7\n" + "movq (%0),%%mm0\n" + "movq 8(%0),%%mm1\n" + "add %2,%0\n" + "subl $2, %%ecx\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movd %%mm6,%1\n" + : "+r" (pix), "=r"(tmp) + : "r" ((long)line_size) , "m" (h) + : "%ecx"); + return tmp; +} +#undef SUM + +static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int tmp; + + assert( (((int)pix1) & 7) == 0); + assert( (((int)pix2) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0),%%mm2\n"\ + "movq (%1)," #out0 "\n"\ + "movq 8(%0),%%mm3\n"\ + "movq 8(%1)," #out1 "\n"\ + "add %3,%0\n"\ + "add %3,%1\n"\ + "psubb " #out0 ", %%mm2\n"\ + "psubb " #out1 ", %%mm3\n"\ + "pxor %%mm7, %%mm2\n"\ + "pxor %%mm7, %%mm3\n"\ + "movq %%mm2, " #out0 "\n"\ + "movq %%mm3, " #out1 "\n"\ + "psubusb " #in0 ", %%mm2\n"\ + "psubusb " #in1 ", %%mm3\n"\ + "psubusb " #out0 ", " #in0 "\n"\ + "psubusb " #out1 ", " #in1 "\n"\ + "por %%mm2, " #in0 "\n"\ + "por %%mm3, " #in1 "\n"\ + "movq " #in0 ", %%mm2\n"\ + "movq " #in1 ", %%mm3\n"\ + "punpcklbw %%mm7, " #in0 "\n"\ + "punpcklbw %%mm7, " #in1 "\n"\ + "punpckhbw %%mm7, %%mm2\n"\ + "punpckhbw %%mm7, %%mm3\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw %%mm3, %%mm2\n"\ + "paddw %%mm2, " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + + asm volatile ( + "movl %4,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pcmpeqw %%mm7,%%mm7\n" + "psllw $15, %%mm7\n" + "packsswb %%mm7, %%mm7\n" + "movq (%0),%%mm0\n" + "movq (%1),%%mm2\n" + "movq 8(%0),%%mm1\n" + "movq 8(%1),%%mm3\n" + "add %3,%0\n" + "add %3,%1\n" + "subl $2, %%ecx\n" + "psubb %%mm2, %%mm0\n" + "psubb %%mm3, %%mm1\n" + "pxor %%mm7, %%mm0\n" + "pxor %%mm7, %%mm1\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movq %%mm6,%%mm0\n" + "psrlq $32, %%mm6\n" + "paddw %%mm6,%%mm0\n" + "movq %%mm0,%%mm6\n" + "psrlq $16, %%mm0\n" + "paddw %%mm6,%%mm0\n" + "movd %%mm0,%2\n" + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" ((long)line_size) , "m" (h) + : "%ecx"); + return tmp & 0x7FFF; +} +#undef SUM + +static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int tmp; + + assert( (((int)pix1) & 7) == 0); + assert( (((int)pix2) & 7) == 0); + assert((line_size &7) ==0); + +#define SUM(in0, in1, out0, out1) \ + "movq (%0)," #out0 "\n"\ + "movq (%1),%%mm2\n"\ + "movq 8(%0)," #out1 "\n"\ + "movq 8(%1),%%mm3\n"\ + "add %3,%0\n"\ + "add %3,%1\n"\ + "psubb %%mm2, " #out0 "\n"\ + "psubb %%mm3, " #out1 "\n"\ + "pxor %%mm7, " #out0 "\n"\ + "pxor %%mm7, " #out1 "\n"\ + "psadbw " #out0 ", " #in0 "\n"\ + "psadbw " #out1 ", " #in1 "\n"\ + "paddw " #in1 ", " #in0 "\n"\ + "paddw " #in0 ", %%mm6\n" + + asm volatile ( + "movl %4,%%ecx\n" + "pxor %%mm6,%%mm6\n" + "pcmpeqw %%mm7,%%mm7\n" + "psllw $15, %%mm7\n" + "packsswb %%mm7, %%mm7\n" + "movq (%0),%%mm0\n" + "movq (%1),%%mm2\n" + "movq 8(%0),%%mm1\n" + "movq 8(%1),%%mm3\n" + "add %3,%0\n" + "add %3,%1\n" + "subl $2, %%ecx\n" + "psubb %%mm2, %%mm0\n" + "psubb %%mm3, %%mm1\n" + "pxor %%mm7, %%mm0\n" + "pxor %%mm7, %%mm1\n" + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + "1:\n" + + SUM(%%mm4, %%mm5, %%mm0, %%mm1) + + SUM(%%mm0, %%mm1, %%mm4, %%mm5) + + "subl $2, %%ecx\n" + "jnz 1b\n" + + "movd %%mm6,%2\n" + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" ((long)line_size) , "m" (h) + : "%ecx"); + return tmp; +} +#undef SUM + +static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ + long i=0; + asm volatile( + "1: \n\t" + "movq (%2, %0), %%mm0 \n\t" + "movq (%1, %0), %%mm1 \n\t" + "psubb %%mm0, %%mm1 \n\t" + "movq %%mm1, (%3, %0) \n\t" + "movq 8(%2, %0), %%mm0 \n\t" + "movq 8(%1, %0), %%mm1 \n\t" + "psubb %%mm0, %%mm1 \n\t" + "movq %%mm1, 8(%3, %0) \n\t" + "add $16, %0 \n\t" + "cmp %4, %0 \n\t" + " jb 1b \n\t" + : "+r" (i) + : "r"(src1), "r"(src2), "r"(dst), "r"((long)w-15) + ); + for(; iput_ ## postfix1 = put_ ## postfix2;\ + c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ + c->avg_ ## postfix1 = avg_ ## postfix2; + +static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){ + const int w = 8; + const int ix = ox>>(16+shift); + const int iy = oy>>(16+shift); + const int oxs = ox>>4; + const int oys = oy>>4; + const int dxxs = dxx>>4; + const int dxys = dxy>>4; + const int dyxs = dyx>>4; + const int dyys = dyy>>4; + const uint16_t r4[4] = {r,r,r,r}; + const uint16_t dxy4[4] = {dxys,dxys,dxys,dxys}; + const uint16_t dyy4[4] = {dyys,dyys,dyys,dyys}; + const uint64_t shift2 = 2*shift; + uint8_t edge_buf[(h+1)*stride]; + int x, y; + + const int dxw = (dxx-(1<<(16+shift)))*(w-1); + const int dyh = (dyy-(1<<(16+shift)))*(h-1); + const int dxh = dxy*(h-1); + const int dyw = dyx*(w-1); + if( // non-constant fullpel offset (3% of blocks) + (ox^(ox+dxw) | ox^(ox+dxh) | ox^(ox+dxw+dxh) | + oy^(oy+dyw) | oy^(oy+dyh) | oy^(oy+dyw+dyh)) >> (16+shift) + // uses more than 16 bits of subpel mv (only at huge resolution) + || (dxx|dxy|dyx|dyy)&15 ) + { + //FIXME could still use mmx for some of the rows + ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height); + return; + } + + src += ix + iy*stride; + if( (unsigned)ix >= width-w || + (unsigned)iy >= height-h ) + { + ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height); + src = edge_buf; + } + + asm volatile( + "movd %0, %%mm6 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + :: "r"(1<>(BASIS_SHIFT - RECON_SHIFT); + } + } +} + +#define PREFETCH(name, op) \ +void name(void *mem, int stride, int h){\ + const uint8_t *p= mem;\ + do{\ + asm volatile(#op" %0" :: "m"(*p));\ + p+= stride;\ + }while(--h);\ +} +PREFETCH(prefetch_mmx2, prefetcht0) +PREFETCH(prefetch_3dnow, prefetch) +#undef PREFETCH + +#include "h264dsp_mmx.c" + +/* external functions, from idct_mmx.c */ +void ff_mmx_idct(DCTELEM *block); +void ff_mmxext_idct(DCTELEM *block); + +void ff_vp3_idct_sse2(int16_t *input_data); +void ff_vp3_idct_mmx(int16_t *data); +void ff_vp3_dsp_init_mmx(void); + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_sse2(block); + put_signed_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_sse2(block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_mmx(block); + put_signed_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_vp3_idct_mmx(block); + add_pixels_clamped_mmx(block, dest, line_size); +} +#ifdef CONFIG_GPL +static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_idct_xvid_mmx (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_idct_xvid_mmx (block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_idct_xvid_mmx2 (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_idct_xvid_mmx2 (block); + add_pixels_clamped_mmx(block, dest, line_size); +} +#endif + +#ifdef CONFIG_SNOW_ENCODER +extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); +extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); +extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); +extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); +extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); +extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); +#endif + +void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) +{ + mm_flags = mm_support(); + + if (avctx->dsp_mask) { + if (avctx->dsp_mask & FF_MM_FORCE) + mm_flags |= (avctx->dsp_mask & 0xffff); + else + mm_flags &= ~(avctx->dsp_mask & 0xffff); + } + +#if 0 + av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); + if (mm_flags & MM_MMX) + av_log(avctx, AV_LOG_INFO, " mmx"); + if (mm_flags & MM_MMXEXT) + av_log(avctx, AV_LOG_INFO, " mmxext"); + if (mm_flags & MM_3DNOW) + av_log(avctx, AV_LOG_INFO, " 3dnow"); + if (mm_flags & MM_SSE) + av_log(avctx, AV_LOG_INFO, " sse"); + if (mm_flags & MM_SSE2) + av_log(avctx, AV_LOG_INFO, " sse2"); + av_log(avctx, AV_LOG_INFO, "\n"); +#endif + + if (mm_flags & MM_MMX) { + const int idct_algo= avctx->idct_algo; + +#ifdef CONFIG_ENCODERS + const int dct_algo = avctx->dct_algo; + if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ + if(mm_flags & MM_SSE2){ + c->fdct = ff_fdct_sse2; + }else if(mm_flags & MM_MMXEXT){ + c->fdct = ff_fdct_mmx2; + }else{ + c->fdct = ff_fdct_mmx; + } + } +#endif //CONFIG_ENCODERS + if(avctx->lowres==0){ + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + c->idct_put= ff_simple_idct_put_mmx; + c->idct_add= ff_simple_idct_add_mmx; + c->idct = ff_simple_idct_mmx; + c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + c->idct_put= ff_libmpeg2mmx2_idct_put; + c->idct_add= ff_libmpeg2mmx2_idct_add; + c->idct = ff_mmxext_idct; + }else{ + c->idct_put= ff_libmpeg2mmx_idct_put; + c->idct_add= ff_libmpeg2mmx_idct_add; + c->idct = ff_mmx_idct; + } + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else if(idct_algo==FF_IDCT_VP3){ + if(mm_flags & MM_SSE2){ + c->idct_put= ff_vp3_idct_put_sse2; + c->idct_add= ff_vp3_idct_add_sse2; + c->idct = ff_vp3_idct_sse2; + c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; + }else{ + ff_vp3_dsp_init_mmx(); + c->idct_put= ff_vp3_idct_put_mmx; + c->idct_add= ff_vp3_idct_add_mmx; + c->idct = ff_vp3_idct_mmx; + c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; + } +#ifdef CONFIG_GPL + }else if(idct_algo==FF_IDCT_XVIDMMX){ + if(mm_flags & MM_MMXEXT){ + c->idct_put= ff_idct_xvid_mmx2_put; + c->idct_add= ff_idct_xvid_mmx2_add; + c->idct = ff_idct_xvid_mmx2; + }else{ + c->idct_put= ff_idct_xvid_mmx_put; + c->idct_add= ff_idct_xvid_mmx_add; + c->idct = ff_idct_xvid_mmx; + } +#endif + } + } + +#ifdef CONFIG_ENCODERS + c->get_pixels = get_pixels_mmx; + c->diff_pixels = diff_pixels_mmx; +#endif //CONFIG_ENCODERS + c->put_pixels_clamped = put_pixels_clamped_mmx; + c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; + c->add_pixels_clamped = add_pixels_clamped_mmx; + c->clear_blocks = clear_blocks_mmx; +#ifdef CONFIG_ENCODERS + c->pix_sum = pix_sum16_mmx; +#endif //CONFIG_ENCODERS + + c->put_pixels_tab[0][0] = put_pixels16_mmx; + c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; + c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; + c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; + + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; + + c->avg_pixels_tab[0][0] = avg_pixels16_mmx; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; + + c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; + + c->put_pixels_tab[1][0] = put_pixels8_mmx; + c->put_pixels_tab[1][1] = put_pixels8_x2_mmx; + c->put_pixels_tab[1][2] = put_pixels8_y2_mmx; + c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx; + + c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; + + c->avg_pixels_tab[1][0] = avg_pixels8_mmx; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; + + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; + + c->gmc= gmc_mmx; + + c->add_bytes= add_bytes_mmx; +#ifdef CONFIG_ENCODERS + c->diff_bytes= diff_bytes_mmx; + + c->hadamard8_diff[0]= hadamard8_diff16_mmx; + c->hadamard8_diff[1]= hadamard8_diff_mmx; + + c->pix_norm1 = pix_norm1_mmx; + c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx; + c->sse[1] = sse8_mmx; + c->vsad[4]= vsad_intra16_mmx; + + c->nsse[0] = nsse16_mmx; + c->nsse[1] = nsse8_mmx; + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->vsad[0] = vsad16_mmx; + } + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->try_8x8basis= try_8x8basis_mmx; + } + c->add_8x8basis= add_8x8basis_mmx; + +#endif //CONFIG_ENCODERS + + c->h263_v_loop_filter= h263_v_loop_filter_mmx; + c->h263_h_loop_filter= h263_h_loop_filter_mmx; + c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; + c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; + + c->h264_idct_dc_add= + c->h264_idct_add= ff_h264_idct_add_mmx; + c->h264_idct8_dc_add= + c->h264_idct8_add= ff_h264_idct8_add_mmx; + + if (mm_flags & MM_MMXEXT) { + c->prefetch = prefetch_mmx2; + + c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; + c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; + + c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; + + c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; + c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; + + c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + +#ifdef CONFIG_ENCODERS + c->hadamard8_diff[0]= hadamard8_diff16_mmx2; + c->hadamard8_diff[1]= hadamard8_diff_mmx2; + c->vsad[4]= vsad_intra16_mmx2; +#endif //CONFIG_ENCODERS + + c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; +#ifdef CONFIG_ENCODERS + c->vsad[0] = vsad16_mmx2; +#endif //CONFIG_ENCODERS + } + +#if 1 + SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][12], qpel16_mc03_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][14], qpel16_mc23_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 0], qpel8_mc00_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 1], qpel8_mc10_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 2], qpel8_mc20_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 3], qpel8_mc30_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 4], qpel8_mc01_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 6], qpel8_mc21_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 8], qpel8_mc02_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][10], qpel8_mc22_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2) + SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) +#endif + +//FIXME 3dnow too +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_mmx2; \ + c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_mmx2; \ + c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_mmx2; \ + c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_mmx2; \ + c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_mmx2; \ + c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_mmx2; \ + c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_mmx2 + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(put_h264_qpel, 1, 8); + dspfunc(put_h264_qpel, 2, 4); + dspfunc(avg_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 1, 8); + dspfunc(avg_h264_qpel, 2, 4); +#undef dspfunc + + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2; + c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; + c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; + c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; + c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; + c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; + c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; + c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; + c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; + c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; + + c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; + c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; + c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; + c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; + c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; + c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; + c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; + c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; + + c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; + c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; + c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; + c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; + c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; + c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; + c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; + c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; + +#ifdef CONFIG_ENCODERS + c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; +#endif //CONFIG_ENCODERS + } else if (mm_flags & MM_3DNOW) { + c->prefetch = prefetch_3dnow; + + c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; + c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; + + c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; + + c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; + c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; + + c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; + } + + SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][11], qpel16_mc32_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][12], qpel16_mc03_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][13], qpel16_mc13_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][14], qpel16_mc23_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[0][15], qpel16_mc33_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 0], qpel8_mc00_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 1], qpel8_mc10_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 2], qpel8_mc20_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 3], qpel8_mc30_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 4], qpel8_mc01_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 5], qpel8_mc11_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 6], qpel8_mc21_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 7], qpel8_mc31_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 8], qpel8_mc02_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][ 9], qpel8_mc12_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][10], qpel8_mc22_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][11], qpel8_mc32_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][12], qpel8_mc03_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) + SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) + +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_3dnow; \ + c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_3dnow; \ + c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_3dnow; \ + c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_3dnow; \ + c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_3dnow; \ + c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_3dnow; \ + c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_3dnow + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(put_h264_qpel, 1, 8); + dspfunc(put_h264_qpel, 2, 4); + dspfunc(avg_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 1, 8); + dspfunc(avg_h264_qpel, 2, 4); + + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; + c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; + } + +#ifdef CONFIG_SNOW_ENCODER + if(mm_flags & MM_SSE2){ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; + c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; + c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; + } + else{ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; + c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; + c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; + } +#endif + } + +#ifdef CONFIG_ENCODERS + dsputil_init_pix_mmx(c, avctx); +#endif //CONFIG_ENCODERS +#if 0 + // for speed testing + get_pixels = just_return; + put_pixels_clamped = just_return; + add_pixels_clamped = just_return; + + pix_abs16x16 = just_return; + pix_abs16x16_x2 = just_return; + pix_abs16x16_y2 = just_return; + pix_abs16x16_xy2 = just_return; + + put_pixels_tab[0] = just_return; + put_pixels_tab[1] = just_return; + put_pixels_tab[2] = just_return; + put_pixels_tab[3] = just_return; + + put_no_rnd_pixels_tab[0] = just_return; + put_no_rnd_pixels_tab[1] = just_return; + put_no_rnd_pixels_tab[2] = just_return; + put_no_rnd_pixels_tab[3] = just_return; + + avg_pixels_tab[0] = just_return; + avg_pixels_tab[1] = just_return; + avg_pixels_tab[2] = just_return; + avg_pixels_tab[3] = just_return; + + avg_no_rnd_pixels_tab[0] = just_return; + avg_no_rnd_pixels_tab[1] = just_return; + avg_no_rnd_pixels_tab[2] = just_return; + avg_no_rnd_pixels_tab[3] = just_return; + + //av_fdct = just_return; + //ff_idct = just_return; +#endif +} diff --git a/mpeg4/src/libavcodec/i386/dsputil_mmx_avg.h b/mpeg4/src/libavcodec/i386/dsputil_mmx_avg.h new file mode 100644 index 0000000000000000000000000000000000000000..440c5bb9c0a532e7394d64aae287f2a26fc9fadc --- /dev/null +++ b/mpeg4/src/libavcodec/i386/dsputil_mmx_avg.h @@ -0,0 +1,820 @@ +/* + * DSP utils : average functions are compiled twice for 3dnow/mmx2 + * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * MMX optimization by Nick Kurshev + * mostly rewritten by Michael Niedermayer + * and improved by Zdenek Kabelac + */ + +/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm + clobber bug - now it will work with 2.95.2 and also with -fPIC + */ +static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm1 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm1 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm1 \n\t" + "add %%"REG_a", %1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $4, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "movd (%2), %%mm2 \n\t" + "movd 4(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "movd 8(%2), %%mm2 \n\t" + "movd 12(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $16, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +} + + +static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 16(%2), %%mm0 \n\t" + PAVGB" 24(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "pcmpeqb %%mm6, %%mm6 \n\t" + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq (%2), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq 16(%2), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $4, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 4(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 8(%2), %%mm0 \n\t" + PAVGB" 12(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $16, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +} + + +static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 16(%2), %%mm0 \n\t" + PAVGB" 24(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 8(%1, %3), %%mm3 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm1 \n\t" + PAVGB" 9(%1), %%mm2 \n\t" + PAVGB" 9(%1, %3), %%mm3 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq %%mm2, 8(%2) \n\t" + "movq %%mm3, 8(%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 8(%1, %3), %%mm3 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm1 \n\t" + PAVGB" 9(%1), %%mm2 \n\t" + PAVGB" 9(%1, %3), %%mm3 \n\t" + "add %%"REG_a", %1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq %%mm2, 8(%2) \n\t" + "movq %%mm3, 8(%2, %3) \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 16(%2), %%mm0 \n\t" + PAVGB" 24(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $2, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + PAVGB" (%3), %%mm0 \n\t" + PAVGB" 8(%3), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + PAVGB" 8(%3), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 16(%2), %%mm0 \n\t" + PAVGB" 24(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + PAVGB" 8(%3), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $2, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "pcmpeqb %%mm6, %%mm6 \n\t" + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq (%2), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq (%2), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq 16(%2), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $2, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +/* GL: this function does incorrect rounding if overflow */ +static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BONE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + "add %%"REG_a", %1 \n\t" + "psubusb %%mm6, %%mm0 \n\t" + "psubusb %%mm6, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" + "psubusb %%mm6, %%mm0 \n\t" + "psubusb %%mm6, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm2, (%2, %3) \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + "sub %3, %2 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq %%mm0, (%2, %3) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D" (block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +/* GL: this function does incorrect rounding if overflow */ +static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BONE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + "sub %3, %2 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" + "psubusb %%mm6, %%mm1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq %%mm0, (%2, %3) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" + "psubusb %%mm6, %%mm1 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D" (block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "1: \n\t" + "movq (%2), %%mm0 \n\t" + "movq (%2, %3), %%mm1 \n\t" + PAVGB" (%1), %%mm0 \n\t" + PAVGB" (%1, %3), %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%2), %%mm0 \n\t" + "movq (%2, %3), %%mm1 \n\t" + PAVGB" (%1), %%mm0 \n\t" + PAVGB" (%1, %3), %%mm1 \n\t" + "add %%"REG_a", %1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm2 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm2 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" (%2, %3), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm2 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm2 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" (%2, %3), %%mm2 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm2, (%2, %3) \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + "sub %3, %2 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "movq (%2, %3), %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" + PAVGB" %%mm3, %%mm0 \n\t" + PAVGB" %%mm4, %%mm1 \n\t" + "movq %%mm0, (%2, %3) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" + "movq (%2, %3), %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + PAVGB" %%mm4, %%mm1 \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter +static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BONE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + PAVGB" 1(%1), %%mm0 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "psubusb %%mm6, %%mm2 \n\t" + PAVGB" 1(%1, %3), %%mm1 \n\t" + PAVGB" 1(%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" (%2, %3), %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + PAVGB" 1(%1, %3), %%mm1 \n\t" + PAVGB" 1(%1, %%"REG_a"), %%mm0 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + PAVGB" (%2), %%mm2 \n\t" + PAVGB" (%2, %3), %%mm1 \n\t" + "movq %%mm2, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +//FIXME the following could be optimized too ... +static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h); + DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h); +} +static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(put_pixels8_y2)(block , pixels , line_size, h); + DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h); +} +static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h); + DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h); +} +static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(avg_pixels8)(block , pixels , line_size, h); + DEF(avg_pixels8)(block+8, pixels+8, line_size, h); +} +static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(avg_pixels8_x2)(block , pixels , line_size, h); + DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h); +} +static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(avg_pixels8_y2)(block , pixels , line_size, h); + DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h); +} +static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(avg_pixels8_xy2)(block , pixels , line_size, h); + DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h); +} + diff --git a/mpeg4/src/libavcodec/i386/dsputil_mmx_rnd.h b/mpeg4/src/libavcodec/i386/dsputil_mmx_rnd.h new file mode 100644 index 0000000000000000000000000000000000000000..3ecd776b824f9f05d719e67dd208219386ccc953 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/dsputil_mmx_rnd.h @@ -0,0 +1,590 @@ +/* + * DSP utils mmx functions are compiled twice for rnd/no_rnd + * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2003-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * MMX optimization by Nick Kurshev + * mostly rewritten by Michael Niedermayer + * and improved by Zdenek Kabelac + */ + +// put_pixels +static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((long)line_size) + :REG_a, "memory"); +} + +static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + MOVQ_BFE(mm6); + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) + "movq %%mm4, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm5, (%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 16(%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + "add $32, %2 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm5, (%3) \n\t" + "add %5, %3 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +} + +static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 9(%1), %%mm1 \n\t" + "movq 8(%1, %3), %%mm2 \n\t" + "movq 9(%1, %3), %%mm3 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, 8(%2) \n\t" + "movq %%mm5, 8(%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 9(%1), %%mm1 \n\t" + "movq 8(%1, %3), %%mm2 \n\t" + "movq 9(%1, %3), %%mm3 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, 8(%2) \n\t" + "movq %%mm5, 8(%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((long)line_size) + :REG_a, "memory"); +} + +static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + MOVQ_BFE(mm6); + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "movq %%mm5, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "movq %%mm5, 8(%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 16(%2), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) + "movq %%mm4, (%3) \n\t" + "movq %%mm5, 8(%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $2, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +} + +static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"),%%mm2 \n\t" + PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"),%%mm0 \n\t" + PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) + "movq %%mm4, (%2) \n\t" + "movq %%mm5, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((long)line_size) + :REG_a, "memory"); +} + +static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_ZERO(mm7); + SET_RND(mm6); // =2 for rnd and =1 for no_rnd version + __asm __volatile( + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm4 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" + "add %3, %1 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddusw %%mm2, %%mm0 \n\t" + "paddusw %%mm3, %%mm1 \n\t" + "paddusw %%mm6, %%mm4 \n\t" + "paddusw %%mm6, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "psrlw $2, %%mm4 \n\t" + "psrlw $2, %%mm5 \n\t" + "packuswb %%mm5, %%mm4 \n\t" + "movq %%mm4, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" + + "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"REG_a"), %%mm4 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm2, %%mm4 \n\t" + "paddusw %%mm3, %%mm5 \n\t" + "paddusw %%mm6, %%mm0 \n\t" + "paddusw %%mm6, %%mm1 \n\t" + "paddusw %%mm4, %%mm0 \n\t" + "paddusw %%mm5, %%mm1 \n\t" + "psrlw $2, %%mm0 \n\t" + "psrlw $2, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" + + "subl $2, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels) + :"D"(block), "r"((long)line_size) + :REG_a, "memory"); +} + +// avg_pixels +static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movd %0, %%mm0 \n\t" + "movd %1, %%mm1 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + "movd %%mm2, %0 \n\t" + :"+m"(*block) + :"m"(*pixels) + :"memory"); + pixels += line_size; + block += line_size; + } + while (--h); +} + +// in case more speed is needed - unroling would certainly help +static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movq %0, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + "movq %%mm2, %0 \n\t" + :"+m"(*block) + :"m"(*pixels) + :"memory"); + pixels += line_size; + block += line_size; + } + while (--h); +} + +static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movq %0, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + "movq %%mm2, %0 \n\t" + "movq 8%0, %%mm0 \n\t" + "movq 8%1, %%mm1 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + "movq %%mm2, 8%0 \n\t" + :"+m"(*block) + :"m"(*pixels) + :"memory"); + pixels += line_size; + block += line_size; + } + while (--h); +} + +static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movq %1, %%mm0 \n\t" + "movq 1%1, %%mm1 \n\t" + "movq %0, %%mm3 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, %0 \n\t" + :"+m"(*block) + :"m"(*pixels) + :"memory"); + pixels += line_size; + block += line_size; + } while (--h); +} + +static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movq %1, %%mm0 \n\t" + "movq %2, %%mm1 \n\t" + "movq %0, %%mm3 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, %0 \n\t" + :"+m"(*dst) + :"m"(*src1), "m"(*src2) + :"memory"); + dst += dstStride; + src1 += src1Stride; + src2 += 8; + } while (--h); +} + +static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movq %1, %%mm0 \n\t" + "movq 1%1, %%mm1 \n\t" + "movq %0, %%mm3 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, %0 \n\t" + "movq 8%1, %%mm0 \n\t" + "movq 9%1, %%mm1 \n\t" + "movq 8%0, %%mm3 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, 8%0 \n\t" + :"+m"(*block) + :"m"(*pixels) + :"memory"); + pixels += line_size; + block += line_size; + } while (--h); +} + +static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movq %1, %%mm0 \n\t" + "movq %2, %%mm1 \n\t" + "movq %0, %%mm3 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, %0 \n\t" + "movq 8%1, %%mm0 \n\t" + "movq 8%2, %%mm1 \n\t" + "movq 8%0, %%mm3 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + PAVGB(%%mm3, %%mm2, %%mm0, %%mm6) + "movq %%mm0, 8%0 \n\t" + :"+m"(*dst) + :"m"(*src1), "m"(*src2) + :"memory"); + dst += dstStride; + src1 += src1Stride; + src2 += 16; + } while (--h); +} + +static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + "movq (%1), %%mm0 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) + "movq (%2), %%mm3 \n\t" + PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) + "movq (%2, %3), %%mm3 \n\t" + PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + + "movq (%1, %3), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) + "movq (%2), %%mm3 \n\t" + PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) + "movq (%2, %3), %%mm3 \n\t" + PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) + "movq %%mm2, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + + "subl $4, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels), "+D"(block) + :"r"((long)line_size) + :REG_a, "memory"); +} + +// this routine is 'slightly' suboptimal but mostly unused +static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_ZERO(mm7); + SET_RND(mm6); // =2 for rnd and =1 for no_rnd version + __asm __volatile( + "movq (%1), %%mm0 \n\t" + "movq 1(%1), %%mm4 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" + "add %3, %1 \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddusw %%mm2, %%mm0 \n\t" + "paddusw %%mm3, %%mm1 \n\t" + "paddusw %%mm6, %%mm4 \n\t" + "paddusw %%mm6, %%mm5 \n\t" + "paddusw %%mm0, %%mm4 \n\t" + "paddusw %%mm1, %%mm5 \n\t" + "psrlw $2, %%mm4 \n\t" + "psrlw $2, %%mm5 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "packuswb %%mm5, %%mm4 \n\t" + "pcmpeqd %%mm2, %%mm2 \n\t" + "paddb %%mm2, %%mm2 \n\t" + PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) + "movq %%mm5, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" + + "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"REG_a"), %%mm4 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm5 \n\t" + "paddusw %%mm2, %%mm4 \n\t" + "paddusw %%mm3, %%mm5 \n\t" + "paddusw %%mm6, %%mm0 \n\t" + "paddusw %%mm6, %%mm1 \n\t" + "paddusw %%mm4, %%mm0 \n\t" + "paddusw %%mm5, %%mm1 \n\t" + "psrlw $2, %%mm0 \n\t" + "psrlw $2, %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "pcmpeqd %%mm2, %%mm2 \n\t" + "paddb %%mm2, %%mm2 \n\t" + PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" + + "subl $2, %0 \n\t" + "jnz 1b \n\t" + :"+g"(h), "+S"(pixels) + :"D"(block), "r"((long)line_size) + :REG_a, "memory"); +} + +//FIXME optimize +static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(put, pixels8_y2)(block , pixels , line_size, h); + DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); +} + +static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(put, pixels8_xy2)(block , pixels , line_size, h); + DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); +} + +static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(avg, pixels8_y2)(block , pixels , line_size, h); + DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); +} + +static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ + DEF(avg, pixels8_xy2)(block , pixels , line_size, h); + DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); +} + + diff --git a/mpeg4/src/libavcodec/i386/fdct_mmx.c b/mpeg4/src/libavcodec/i386/fdct_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..f6150c83c999989a1ca1f74ffe70431ebb283387 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/fdct_mmx.c @@ -0,0 +1,562 @@ +/* + * MMX optimized forward DCT + * The gcc porting is Copyright (c) 2001 Fabrice Bellard. + * cleanup/optimizations are Copyright (c) 2002-2004 Michael Niedermayer + * SSE2 optimization is Copyright (c) 2004 Denes Balatoni. + * + * from fdctam32.c - AP922 MMX(3D-Now) forward-DCT + * + * Intel Application Note AP-922 - fast, precise implementation of DCT + * http://developer.intel.com/vtune/cbts/appnotes.htm + * + * Also of inspiration: + * a page about fdct at http://www.geocities.com/ssavekar/dct.htm + * Skal's fdct at http://skal.planet-d.net/coding/dct.html + */ +#include "common.h" +#include "../dsputil.h" +#include "mmx.h" + +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align))) + +////////////////////////////////////////////////////////////////////// +// +// constants for the forward DCT +// ----------------------------- +// +// Be sure to check that your compiler is aligning all constants to QWORD +// (8-byte) memory boundaries! Otherwise the unaligned memory access will +// severely stall MMX execution. +// +////////////////////////////////////////////////////////////////////// + +#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy +#define SHIFT_FRW_COL BITS_FRW_ACC +#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) +#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) +//#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) + +//concatenated table, for forward DCT transformation +static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = { + 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 + 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 + -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 +}; + +static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { + 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 +}; + +static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; + +static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW }; + +struct +{ + const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16); +} fdct_r_row_sse2 ATTR_ALIGN(16)= +{{ + RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW +}}; +//static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW}; + +static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = { // forward_dct coeff table + 16384, 16384, 22725, 19266, + 16384, 16384, 12873, 4520, + 21407, 8867, 19266, -4520, + -8867, -21407, -22725, -12873, + 16384, -16384, 12873, -22725, + -16384, 16384, 4520, 19266, + 8867, -21407, 4520, -12873, + 21407, -8867, 19266, -22725, + + 22725, 22725, 31521, 26722, + 22725, 22725, 17855, 6270, + 29692, 12299, 26722, -6270, + -12299, -29692, -31521, -17855, + 22725, -22725, 17855, -31521, + -22725, 22725, 6270, 26722, + 12299, -29692, 6270, -17855, + 29692, -12299, 26722, -31521, + + 21407, 21407, 29692, 25172, + 21407, 21407, 16819, 5906, + 27969, 11585, 25172, -5906, + -11585, -27969, -29692, -16819, + 21407, -21407, 16819, -29692, + -21407, 21407, 5906, 25172, + 11585, -27969, 5906, -16819, + 27969, -11585, 25172, -29692, + + 19266, 19266, 26722, 22654, + 19266, 19266, 15137, 5315, + 25172, 10426, 22654, -5315, + -10426, -25172, -26722, -15137, + 19266, -19266, 15137, -26722, + -19266, 19266, 5315, 22654, + 10426, -25172, 5315, -15137, + 25172, -10426, 22654, -26722, + + 16384, 16384, 22725, 19266, + 16384, 16384, 12873, 4520, + 21407, 8867, 19266, -4520, + -8867, -21407, -22725, -12873, + 16384, -16384, 12873, -22725, + -16384, 16384, 4520, 19266, + 8867, -21407, 4520, -12873, + 21407, -8867, 19266, -22725, + + 19266, 19266, 26722, 22654, + 19266, 19266, 15137, 5315, + 25172, 10426, 22654, -5315, + -10426, -25172, -26722, -15137, + 19266, -19266, 15137, -26722, + -19266, 19266, 5315, 22654, + 10426, -25172, 5315, -15137, + 25172, -10426, 22654, -26722, + + 21407, 21407, 29692, 25172, + 21407, 21407, 16819, 5906, + 27969, 11585, 25172, -5906, + -11585, -27969, -29692, -16819, + 21407, -21407, 16819, -29692, + -21407, 21407, 5906, 25172, + 11585, -27969, 5906, -16819, + 27969, -11585, 25172, -29692, + + 22725, 22725, 31521, 26722, + 22725, 22725, 17855, 6270, + 29692, 12299, 26722, -6270, + -12299, -29692, -31521, -17855, + 22725, -22725, 17855, -31521, + -22725, 22725, 6270, 26722, + 12299, -29692, 6270, -17855, + 29692, -12299, 26722, -31521, +}; + +struct +{ + const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16); +} tab_frw_01234567_sse2 ATTR_ALIGN(16) = +{{ +//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = { // forward_dct coeff table +#define TABLE_SSE2 C4, C4, C1, C3, -C6, -C2, -C1, -C5, \ + C4, C4, C5, C7, C2, C6, C3, -C7, \ + -C4, C4, C7, C3, C6, -C2, C7, -C5, \ + C4, -C4, C5, -C1, C2, -C6, C3, -C1, +// c1..c7 * cos(pi/4) * 2^15 +#define C1 22725 +#define C2 21407 +#define C3 19266 +#define C4 16384 +#define C5 12873 +#define C6 8867 +#define C7 4520 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 31521 +#define C2 29692 +#define C3 26722 +#define C4 22725 +#define C5 17855 +#define C6 12299 +#define C7 6270 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 29692 +#define C2 27969 +#define C3 25172 +#define C4 21407 +#define C5 16819 +#define C6 11585 +#define C7 5906 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 26722 +#define C2 25172 +#define C3 22654 +#define C4 19266 +#define C5 15137 +#define C6 10426 +#define C7 5315 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 22725 +#define C2 21407 +#define C3 19266 +#define C4 16384 +#define C5 12873 +#define C6 8867 +#define C7 4520 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 26722 +#define C2 25172 +#define C3 22654 +#define C4 19266 +#define C5 15137 +#define C6 10426 +#define C7 5315 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 29692 +#define C2 27969 +#define C3 25172 +#define C4 21407 +#define C5 16819 +#define C6 11585 +#define C7 5906 +TABLE_SSE2 + +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +#define C1 31521 +#define C2 29692 +#define C3 26722 +#define C4 22725 +#define C5 17855 +#define C6 12299 +#define C7 6270 +TABLE_SSE2 +}}; + + +static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset) +{ + movq_m2r(*(in + offset + 1 * 8), mm0); + movq_m2r(*(in + offset + 6 * 8), mm1); + movq_r2r(mm0, mm2); + movq_m2r(*(in + offset + 2 * 8), mm3); + paddsw_r2r(mm1, mm0); + movq_m2r(*(in + offset + 5 * 8), mm4); + psllw_i2r(SHIFT_FRW_COL, mm0); + movq_m2r(*(in + offset + 0 * 8), mm5); + paddsw_r2r(mm3, mm4); + paddsw_m2r(*(in + offset + 7 * 8), mm5); + psllw_i2r(SHIFT_FRW_COL, mm4); + movq_r2r(mm0, mm6); + psubsw_r2r(mm1, mm2); + movq_m2r(*(fdct_tg_all_16 + 4), mm1); + psubsw_r2r(mm4, mm0); + movq_m2r(*(in + offset + 3 * 8), mm7); + pmulhw_r2r(mm0, mm1); + paddsw_m2r(*(in + offset + 4 * 8), mm7); + psllw_i2r(SHIFT_FRW_COL, mm5); + paddsw_r2r(mm4, mm6); + psllw_i2r(SHIFT_FRW_COL, mm7); + movq_r2r(mm5, mm4); + psubsw_r2r(mm7, mm5); + paddsw_r2r(mm5, mm1); + paddsw_r2r(mm7, mm4); + por_m2r(fdct_one_corr, mm1); + psllw_i2r(SHIFT_FRW_COL + 1, mm2); + pmulhw_m2r(*(fdct_tg_all_16 + 4), mm5); + movq_r2r(mm4, mm7); + psubsw_m2r(*(in + offset + 5 * 8), mm3); + psubsw_r2r(mm6, mm4); + movq_r2m(mm1, *(out + offset + 2 * 8)); + paddsw_r2r(mm6, mm7); + movq_m2r(*(in + offset + 3 * 8), mm1); + psllw_i2r(SHIFT_FRW_COL + 1, mm3); + psubsw_m2r(*(in + offset + 4 * 8), mm1); + movq_r2r(mm2, mm6); + movq_r2m(mm4, *(out + offset + 4 * 8)); + paddsw_r2r(mm3, mm2); + pmulhw_m2r(*ocos_4_16, mm2); + psubsw_r2r(mm3, mm6); + pmulhw_m2r(*ocos_4_16, mm6); + psubsw_r2r(mm0, mm5); + por_m2r(fdct_one_corr, mm5); + psllw_i2r(SHIFT_FRW_COL, mm1); + por_m2r(fdct_one_corr, mm2); + movq_r2r(mm1, mm4); + movq_m2r(*(in + offset + 0 * 8), mm3); + paddsw_r2r(mm6, mm1); + psubsw_m2r(*(in + offset + 7 * 8), mm3); + psubsw_r2r(mm6, mm4); + movq_m2r(*(fdct_tg_all_16 + 0), mm0); + psllw_i2r(SHIFT_FRW_COL, mm3); + movq_m2r(*(fdct_tg_all_16 + 8), mm6); + pmulhw_r2r(mm1, mm0); + movq_r2m(mm7, *(out + offset + 0 * 8)); + pmulhw_r2r(mm4, mm6); + movq_r2m(mm5, *(out + offset + 6 * 8)); + movq_r2r(mm3, mm7); + movq_m2r(*(fdct_tg_all_16 + 8), mm5); + psubsw_r2r(mm2, mm7); + paddsw_r2r(mm2, mm3); + pmulhw_r2r(mm7, mm5); + paddsw_r2r(mm3, mm0); + paddsw_r2r(mm4, mm6); + pmulhw_m2r(*(fdct_tg_all_16 + 0), mm3); + por_m2r(fdct_one_corr, mm0); + paddsw_r2r(mm7, mm5); + psubsw_r2r(mm6, mm7); + movq_r2m(mm0, *(out + offset + 1 * 8)); + paddsw_r2r(mm4, mm5); + movq_r2m(mm7, *(out + offset + 3 * 8)); + psubsw_r2r(mm1, mm3); + movq_r2m(mm5, *(out + offset + 5 * 8)); + movq_r2m(mm3, *(out + offset + 7 * 8)); +} + + +static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) +{ + asm volatile( + ".macro FDCT_ROW_SSE2_H1 i t \n\t" + "movq \\i(%0), %%xmm2 \n\t" + "movq \\i+8(%0), %%xmm0 \n\t" + "movdqa \\t+32(%1), %%xmm3 \n\t" + "movdqa \\t+48(%1), %%xmm7 \n\t" + "movdqa \\t(%1), %%xmm4 \n\t" + "movdqa \\t+16(%1), %%xmm5 \n\t" + ".endm \n\t" + ".macro FDCT_ROW_SSE2_H2 i t \n\t" + "movq \\i(%0), %%xmm2 \n\t" + "movq \\i+8(%0), %%xmm0 \n\t" + "movdqa \\t+32(%1), %%xmm3 \n\t" + "movdqa \\t+48(%1), %%xmm7 \n\t" + ".endm \n\t" + ".macro FDCT_ROW_SSE2 i \n\t" + "movq %%xmm2, %%xmm1 \n\t" + "pshuflw $27, %%xmm0, %%xmm0 \n\t" + "paddsw %%xmm0, %%xmm1 \n\t" + "psubsw %%xmm0, %%xmm2 \n\t" + "punpckldq %%xmm2, %%xmm1 \n\t" + "pshufd $78, %%xmm1, %%xmm2 \n\t" + "pmaddwd %%xmm2, %%xmm3 \n\t" + "pmaddwd %%xmm1, %%xmm7 \n\t" + "pmaddwd %%xmm5, %%xmm2 \n\t" + "pmaddwd %%xmm4, %%xmm1 \n\t" + "paddd %%xmm7, %%xmm3 \n\t" + "paddd %%xmm2, %%xmm1 \n\t" + "paddd %%xmm6, %%xmm3 \n\t" + "paddd %%xmm6, %%xmm1 \n\t" + "psrad %3, %%xmm3 \n\t" + "psrad %3, %%xmm1 \n\t" + "packssdw %%xmm3, %%xmm1 \n\t" + "movdqa %%xmm1, \\i(%4) \n\t" + ".endm \n\t" + "movdqa (%2), %%xmm6 \n\t" + "FDCT_ROW_SSE2_H1 0 0 \n\t" + "FDCT_ROW_SSE2 0 \n\t" + "FDCT_ROW_SSE2_H2 64 0 \n\t" + "FDCT_ROW_SSE2 64 \n\t" + + "FDCT_ROW_SSE2_H1 16 64 \n\t" + "FDCT_ROW_SSE2 16 \n\t" + "FDCT_ROW_SSE2_H2 112 64 \n\t" + "FDCT_ROW_SSE2 112 \n\t" + + "FDCT_ROW_SSE2_H1 32 128 \n\t" + "FDCT_ROW_SSE2 32 \n\t" + "FDCT_ROW_SSE2_H2 96 128 \n\t" + "FDCT_ROW_SSE2 96 \n\t" + + "FDCT_ROW_SSE2_H1 48 192 \n\t" + "FDCT_ROW_SSE2 48 \n\t" + "FDCT_ROW_SSE2_H2 80 192 \n\t" + "FDCT_ROW_SSE2 80 \n\t" + : + : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) + ); +} + +static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) +{ + pshufw_m2r(*(in + 4), mm5, 0x1B); + movq_m2r(*(in + 0), mm0); + movq_r2r(mm0, mm1); + paddsw_r2r(mm5, mm0); + psubsw_r2r(mm5, mm1); + movq_r2r(mm0, mm2); + punpckldq_r2r(mm1, mm0); + punpckhdq_r2r(mm1, mm2); + movq_m2r(*(table + 0), mm1); + movq_m2r(*(table + 4), mm3); + movq_m2r(*(table + 8), mm4); + movq_m2r(*(table + 12), mm5); + movq_m2r(*(table + 16), mm6); + movq_m2r(*(table + 20), mm7); + pmaddwd_r2r(mm0, mm1); + pmaddwd_r2r(mm2, mm3); + pmaddwd_r2r(mm0, mm4); + pmaddwd_r2r(mm2, mm5); + pmaddwd_r2r(mm0, mm6); + pmaddwd_r2r(mm2, mm7); + pmaddwd_m2r(*(table + 24), mm0); + pmaddwd_m2r(*(table + 28), mm2); + paddd_r2r(mm1, mm3); + paddd_r2r(mm4, mm5); + paddd_r2r(mm6, mm7); + paddd_r2r(mm0, mm2); + movq_m2r(*fdct_r_row, mm0); + paddd_r2r(mm0, mm3); + paddd_r2r(mm0, mm5); + paddd_r2r(mm0, mm7); + paddd_r2r(mm0, mm2); + psrad_i2r(SHIFT_FRW_ROW, mm3); + psrad_i2r(SHIFT_FRW_ROW, mm5); + psrad_i2r(SHIFT_FRW_ROW, mm7); + psrad_i2r(SHIFT_FRW_ROW, mm2); + packssdw_r2r(mm5, mm3); + packssdw_r2r(mm2, mm7); + movq_r2m(mm3, *(out + 0)); + movq_r2m(mm7, *(out + 4)); +} + +static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table) +{ +//FIXME reorder (i dont have a old mmx only cpu here to benchmark ...) + movd_m2r(*(in + 6), mm1); + punpcklwd_m2r(*(in + 4), mm1); + movq_r2r(mm1, mm2); + psrlq_i2r(0x20, mm1); + movq_m2r(*(in + 0), mm0); + punpcklwd_r2r(mm2, mm1); + movq_r2r(mm0, mm5); + paddsw_r2r(mm1, mm0); + psubsw_r2r(mm1, mm5); + movq_r2r(mm0, mm2); + punpckldq_r2r(mm5, mm0); + punpckhdq_r2r(mm5, mm2); + movq_m2r(*(table + 0), mm1); + movq_m2r(*(table + 4), mm3); + movq_m2r(*(table + 8), mm4); + movq_m2r(*(table + 12), mm5); + movq_m2r(*(table + 16), mm6); + movq_m2r(*(table + 20), mm7); + pmaddwd_r2r(mm0, mm1); + pmaddwd_r2r(mm2, mm3); + pmaddwd_r2r(mm0, mm4); + pmaddwd_r2r(mm2, mm5); + pmaddwd_r2r(mm0, mm6); + pmaddwd_r2r(mm2, mm7); + pmaddwd_m2r(*(table + 24), mm0); + pmaddwd_m2r(*(table + 28), mm2); + paddd_r2r(mm1, mm3); + paddd_r2r(mm4, mm5); + paddd_r2r(mm6, mm7); + paddd_r2r(mm0, mm2); + movq_m2r(*fdct_r_row, mm0); + paddd_r2r(mm0, mm3); + paddd_r2r(mm0, mm5); + paddd_r2r(mm0, mm7); + paddd_r2r(mm0, mm2); + psrad_i2r(SHIFT_FRW_ROW, mm3); + psrad_i2r(SHIFT_FRW_ROW, mm5); + psrad_i2r(SHIFT_FRW_ROW, mm7); + psrad_i2r(SHIFT_FRW_ROW, mm2); + packssdw_r2r(mm5, mm3); + packssdw_r2r(mm2, mm7); + movq_r2m(mm3, *(out + 0)); + movq_r2m(mm7, *(out + 4)); +} + +void ff_fdct_mmx(int16_t *block) +{ + int64_t align_tmp[16] ATTR_ALIGN(8); + int16_t * const block_tmp= (int16_t*)align_tmp; + int16_t *block1, *out; + const int16_t *table; + int i; + + block1 = block_tmp; + fdct_col(block, block1, 0); + fdct_col(block, block1, 4); + + block1 = block_tmp; + table = tab_frw_01234567; + out = block; + for(i=8;i>0;i--) { + fdct_row_mmx(block1, out, table); + block1 += 8; + table += 32; + out += 8; + } +} + +void ff_fdct_mmx2(int16_t *block) +{ + int64_t align_tmp[16] ATTR_ALIGN(8); + int16_t * const block_tmp= (int16_t*)align_tmp; + int16_t *block1, *out; + const int16_t *table; + int i; + + block1 = block_tmp; + fdct_col(block, block1, 0); + fdct_col(block, block1, 4); + + block1 = block_tmp; + table = tab_frw_01234567; + out = block; + for(i=8;i>0;i--) { + fdct_row_mmx2(block1, out, table); + block1 += 8; + table += 32; + out += 8; + } +} + +void ff_fdct_sse2(int16_t *block) +{ + int64_t align_tmp[16] ATTR_ALIGN(8); + int16_t * const block_tmp= (int16_t*)align_tmp; + int16_t *block1; + + block1 = block_tmp; + fdct_col(block, block1, 0); + fdct_col(block, block1, 4); + + fdct_row_sse2(block1, block); +} + diff --git a/mpeg4/src/libavcodec/i386/fft_3dn.c b/mpeg4/src/libavcodec/i386/fft_3dn.c new file mode 100644 index 0000000000000000000000000000000000000000..16595bddee629cad807ef5a00f1390fa0e7415ec --- /dev/null +++ b/mpeg4/src/libavcodec/i386/fft_3dn.c @@ -0,0 +1,136 @@ +/* + * FFT/MDCT transform with 3DNow! optimizations + * Copyright (c) 2006 Zuxy MENG Jie. + * Based on fft_sse.c copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "../dsputil.h" +#include + +#ifdef HAVE_MM3DNOW + +#include + +static const int p1m1[2] __attribute__((aligned(8))) = + { 0, 1 << 31 }; + +static const int m1p1[2] __attribute__((aligned(8))) = + { 1 << 31, 0 }; + +void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *cptr, *cptr1; + int k; + + np = 1 << ln; + /* FEMMS not a must here but recommended by AMD */ + _m_femms(); + + { + __m64 *r, a0, a1, b0, b1, tmp, c; + + r = (__m64 *)&z[0]; + if (s->inverse) + c = *(__m64 *)m1p1; + else + c = *(__m64 *)p1m1; + + j = (np >> 2); + do { + /* do the pass 0 butterfly */ + a0 = _m_pfadd(r[0], r[1]); + a1 = _m_pfsub(r[0], r[1]); + + /* do the pass 0 butterfly */ + b0 = _m_pfadd(r[2], r[3]); + b1 = _m_pfsub(r[2], r[3]); + + /* multiply third by -i */ + tmp = _m_punpckhdq(b1, b1); + b1 = _m_punpckldq(b1, b1); + b1 = _m_punpckldq(tmp, b1); + b1 = _m_pxor(b1, c); + + /* do the pass 1 butterfly */ + r[0] = _m_pfadd(a0, b0); + r[1] = _m_pfadd(a1, b1); + r[2] = _m_pfsub(a0, b0); + r[3] = _m_pfsub(a1, b1); + r += 4; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + + cptr1 = s->exptab1; + do { + p = z; + q = z + nloops; + j = nblocks; + do { + cptr = cptr1; + k = nloops >> 1; + do { + __m64 a0, a1, b0, b1, c0, c1, t10, t11, t20, t21; + + a0 = *(__m64 *)&p[0]; + a1 = *(__m64 *)&p[1]; + b0 = *(__m64 *)&q[0]; + b1 = *(__m64 *)&q[1]; + + /* complex mul */ + c0 = *(__m64 *)&cptr[0]; + c1 = *(__m64 *)&cptr[1]; + /* cre*re cim*re */ + t10 = _m_pfmul(c0, _m_punpckldq(b0, b0)); + t11 = _m_pfmul(c1, _m_punpckldq(b1, b1)); + c0 = *(__m64 *)&cptr[2]; + c1 = *(__m64 *)&cptr[3]; + /* -cim*im cre*im */ + t20 = _m_pfmul(c0, _m_punpckhdq(b0, b0)); + t21 = _m_pfmul(c1, _m_punpckhdq(b1, b1)); + b0 = _m_pfadd(t10, t20); + b1 = _m_pfadd(t11, t21); + + /* butterfly */ + *(__m64 *)&p[0] = _m_pfadd(a0, b0); + *(__m64 *)&p[1] = _m_pfadd(a1, b1); + *(__m64 *)&q[0] = _m_pfsub(a0, b0); + *(__m64 *)&q[1] = _m_pfsub(a1, b1); + + p += 2; + q += 2; + cptr += 4; + } while (--k); + + p += nloops; + q += nloops; + } while (--j); + cptr1 += nloops * 2; + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); + _m_femms(); +} + +#endif diff --git a/mpeg4/src/libavcodec/i386/fft_3dn2.c b/mpeg4/src/libavcodec/i386/fft_3dn2.c new file mode 100644 index 0000000000000000000000000000000000000000..aa8f0aee2e903b1d47a134c4bed6cc7f926565cc --- /dev/null +++ b/mpeg4/src/libavcodec/i386/fft_3dn2.c @@ -0,0 +1,136 @@ +/* + * FFT/MDCT transform with Extended 3DNow! optimizations + * Copyright (c) 2006 Zuxy MENG Jie. + * Based on fft_sse.c copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "../dsputil.h" +#include + +#ifdef HAVE_MM3DNOW + +#include + +static const int p1m1[2] __attribute__((aligned(8))) = + { 0, 1 << 31 }; + +static const int m1p1[2] __attribute__((aligned(8))) = + { 1 << 31, 0 }; + +void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *cptr, *cptr1; + int k; + + np = 1 << ln; + /* FEMMS is not a must here but recommended by AMD */ + _m_femms(); + + { + __m64 *r, a0, a1, b0, b1, c; + + r = (__m64 *)&z[0]; + if (s->inverse) + c = *(__m64 *)m1p1; + else + c = *(__m64 *)p1m1; + + j = (np >> 2); + do { + /* do the pass 0 butterfly */ + a0 = _m_pfadd(r[0], r[1]); + a1 = _m_pfsub(r[0], r[1]); + + /* do the pass 0 butterfly */ + b0 = _m_pfadd(r[2], r[3]); + b1 = _m_pfsub(r[2], r[3]); + + /* multiply third by -i */ + b1 = _m_pswapd(b1); + b1 = _m_pxor(b1, c); + + r[0] = _m_pfadd(a0, b0); + r[1] = _m_pfadd(a1, b1); + r[2] = _m_pfsub(a0, b0); + r[3] = _m_pfsub(a1, b1); + r += 4; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + + cptr1 = s->exptab1; + do { + p = z; + q = z + nloops; + j = nblocks; + do { + cptr = cptr1; + k = nloops >> 1; + do { + __m64 a0, a1, b0, b1, c0, c1, t10, t11, t20, t21; + + a0 = *(__m64 *)&p[0]; + a1 = *(__m64 *)&p[1]; + b0 = *(__m64 *)&q[0]; + b1 = *(__m64 *)&q[1]; + + /* complex mul */ + c0 = *(__m64 *)&cptr[0]; + c1 = *(__m64 *)&cptr[1]; + /* cre*re cim*im */ + t10 = _m_pfmul(c0, b0); + t11 = _m_pfmul(c1, b1); + /* no need to access cptr[2] & cptr[3] */ + c0 = _m_pswapd(c0); + c1 = _m_pswapd(c1); + /* cim*re cre*im */ + t20 = _m_pfmul(c0, b0); + t21 = _m_pfmul(c1, b1); + + /* cre*re-cim*im cim*re+cre*im */ + b0 = _m_pfpnacc(t10, t20); + b1 = _m_pfpnacc(t11, t21); + + /* butterfly */ + *(__m64 *)&p[0] = _m_pfadd(a0, b0); + *(__m64 *)&p[1] = _m_pfadd(a1, b1); + *(__m64 *)&q[0] = _m_pfsub(a0, b0); + *(__m64 *)&q[1] = _m_pfsub(a1, b1); + + p += 2; + q += 2; + cptr += 4; + } while (--k); + + p += nloops; + q += nloops; + } while (--j); + cptr1 += nloops * 2; + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); + _m_femms(); +} + +#endif diff --git a/mpeg4/src/libavcodec/i386/fft_sse.c b/mpeg4/src/libavcodec/i386/fft_sse.c new file mode 100644 index 0000000000000000000000000000000000000000..631848265a27b9d7f5adf80fb7cf12383d653a36 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/fft_sse.c @@ -0,0 +1,140 @@ +/* + * FFT/MDCT transform with SSE optimizations + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "../dsputil.h" +#include + +#ifdef HAVE_BUILTIN_VECTOR + +#include + +static const int p1p1p1m1[4] __attribute__((aligned(16))) = + { 0, 0, 0, 1 << 31 }; + +static const int p1p1m1p1[4] __attribute__((aligned(16))) = + { 0, 0, 1 << 31, 0 }; + +static const int p1p1m1m1[4] __attribute__((aligned(16))) = + { 0, 0, 1 << 31, 1 << 31 }; + +#if 0 +static void print_v4sf(const char *str, __m128 a) +{ + float *p = (float *)&a; + printf("%s: %f %f %f %f\n", + str, p[0], p[1], p[2], p[3]); +} +#endif + +/* XXX: handle reverse case */ +void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *cptr, *cptr1; + int k; + + np = 1 << ln; + + { + __m128 *r, a, b, a1, c1, c2; + + r = (__m128 *)&z[0]; + c1 = *(__m128 *)p1p1m1m1; + if (s->inverse) + c2 = *(__m128 *)p1p1m1p1; + else + c2 = *(__m128 *)p1p1p1m1; + + j = (np >> 2); + do { + a = r[0]; + b = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + a = _mm_xor_ps(a, c1); + /* do the pass 0 butterfly */ + a = _mm_add_ps(a, b); + + a1 = r[1]; + b = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 0, 3, 2)); + a1 = _mm_xor_ps(a1, c1); + /* do the pass 0 butterfly */ + b = _mm_add_ps(a1, b); + + /* multiply third by -i */ + /* by toggling the sign bit */ + b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 1, 0)); + b = _mm_xor_ps(b, c2); + + /* do the pass 1 butterfly */ + r[0] = _mm_add_ps(a, b); + r[1] = _mm_sub_ps(a, b); + r += 2; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + + cptr1 = s->exptab1; + do { + p = z; + q = z + nloops; + j = nblocks; + do { + cptr = cptr1; + k = nloops >> 1; + do { + __m128 a, b, c, t1, t2; + + a = *(__m128 *)p; + b = *(__m128 *)q; + + /* complex mul */ + c = *(__m128 *)cptr; + /* cre*re cim*re */ + t1 = _mm_mul_ps(c, + _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0))); + c = *(__m128 *)(cptr + 2); + /* -cim*im cre*im */ + t2 = _mm_mul_ps(c, + _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1))); + b = _mm_add_ps(t1, t2); + + /* butterfly */ + *(__m128 *)p = _mm_add_ps(a, b); + *(__m128 *)q = _mm_sub_ps(a, b); + + p += 2; + q += 2; + cptr += 4; + } while (--k); + + p += nloops; + q += nloops; + } while (--j); + cptr1 += nloops * 2; + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +#endif diff --git a/mpeg4/src/libavcodec/i386/h264dsp_mmx.c b/mpeg4/src/libavcodec/i386/h264dsp_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..aedde696a440b1886cd851217c5d04e8f4949c0e --- /dev/null +++ b/mpeg4/src/libavcodec/i386/h264dsp_mmx.c @@ -0,0 +1,1433 @@ +/* + * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +/***********************************/ +/* IDCT */ + +/* in/out: mma=mma+mmb, mmb=mmb-mma */ +#define SUMSUB_BA( a, b ) \ + "paddw "#b", "#a" \n\t"\ + "paddw "#b", "#b" \n\t"\ + "psubw "#a", "#b" \n\t" + +#define SUMSUB_BADC( a, b, c, d ) \ + "paddw "#b", "#a" \n\t"\ + "paddw "#d", "#c" \n\t"\ + "paddw "#b", "#b" \n\t"\ + "paddw "#d", "#d" \n\t"\ + "psubw "#a", "#b" \n\t"\ + "psubw "#c", "#d" \n\t" + +#define SUMSUBD2_AB( a, b, t ) \ + "movq "#b", "#t" \n\t"\ + "psraw $1 , "#b" \n\t"\ + "paddw "#a", "#b" \n\t"\ + "psraw $1 , "#a" \n\t"\ + "psubw "#t", "#a" \n\t" + +#define IDCT4_1D( s02, s13, d02, d13, t ) \ + SUMSUB_BA ( s02, d02 )\ + SUMSUBD2_AB( s13, d13, t )\ + SUMSUB_BADC( d13, s02, s13, d02 ) + +#define SBUTTERFLY(a,b,t,n)\ + "movq " #a ", " #t " \n\t" /* abcd */\ + "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ + "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ + +#define TRANSPOSE4(a,b,c,d,t)\ + SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ + SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\ + SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\ + SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */ + +#define STORE_DIFF_4P( p, t, z ) \ + "psraw $6, "#p" \n\t"\ + "movd (%0), "#t" \n\t"\ + "punpcklbw "#z", "#t" \n\t"\ + "paddsw "#t", "#p" \n\t"\ + "packuswb "#z", "#p" \n\t"\ + "movd "#p", (%0) \n\t" + +static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) +{ + /* Load dct coeffs */ + asm volatile( + "movq (%0), %%mm0 \n\t" + "movq 8(%0), %%mm1 \n\t" + "movq 16(%0), %%mm2 \n\t" + "movq 24(%0), %%mm3 \n\t" + :: "r"(block) ); + + asm volatile( + /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */ + IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 ) + + "movq %0, %%mm6 \n\t" + /* in: 1,4,0,2 out: 1,2,3,0 */ + TRANSPOSE4( %%mm3, %%mm1, %%mm0, %%mm2, %%mm4 ) + + "paddw %%mm6, %%mm3 \n\t" + + /* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */ + IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 ) + + "pxor %%mm7, %%mm7 \n\t" + :: "m"(ff_pw_32)); + + asm volatile( + STORE_DIFF_4P( %%mm0, %%mm1, %%mm7) + "add %1, %0 \n\t" + STORE_DIFF_4P( %%mm2, %%mm1, %%mm7) + "add %1, %0 \n\t" + STORE_DIFF_4P( %%mm3, %%mm1, %%mm7) + "add %1, %0 \n\t" + STORE_DIFF_4P( %%mm4, %%mm1, %%mm7) + : "+r"(dst) + : "r" ((long)stride) + ); +} + +static inline void h264_idct8_1d(int16_t *block) +{ + asm volatile( + "movq 112(%0), %%mm7 \n\t" + "movq 80(%0), %%mm5 \n\t" + "movq 48(%0), %%mm3 \n\t" + "movq 16(%0), %%mm1 \n\t" + + "movq %%mm7, %%mm4 \n\t" + "movq %%mm3, %%mm6 \n\t" + "movq %%mm5, %%mm0 \n\t" + "movq %%mm7, %%mm2 \n\t" + "psraw $1, %%mm4 \n\t" + "psraw $1, %%mm6 \n\t" + "psubw %%mm7, %%mm0 \n\t" + "psubw %%mm6, %%mm2 \n\t" + "psubw %%mm4, %%mm0 \n\t" + "psubw %%mm3, %%mm2 \n\t" + "psubw %%mm3, %%mm0 \n\t" + "paddw %%mm1, %%mm2 \n\t" + + "movq %%mm5, %%mm4 \n\t" + "movq %%mm1, %%mm6 \n\t" + "psraw $1, %%mm4 \n\t" + "psraw $1, %%mm6 \n\t" + "paddw %%mm5, %%mm4 \n\t" + "paddw %%mm1, %%mm6 \n\t" + "paddw %%mm7, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "psubw %%mm1, %%mm4 \n\t" + "paddw %%mm3, %%mm6 \n\t" + + "movq %%mm0, %%mm1 \n\t" + "movq %%mm4, %%mm3 \n\t" + "movq %%mm2, %%mm5 \n\t" + "movq %%mm6, %%mm7 \n\t" + "psraw $2, %%mm6 \n\t" + "psraw $2, %%mm3 \n\t" + "psraw $2, %%mm5 \n\t" + "psraw $2, %%mm0 \n\t" + "paddw %%mm6, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "psubw %%mm4, %%mm5 \n\t" + "psubw %%mm0, %%mm7 \n\t" + + "movq 32(%0), %%mm2 \n\t" + "movq 96(%0), %%mm6 \n\t" + "movq %%mm2, %%mm4 \n\t" + "movq %%mm6, %%mm0 \n\t" + "psraw $1, %%mm4 \n\t" + "psraw $1, %%mm6 \n\t" + "psubw %%mm0, %%mm4 \n\t" + "paddw %%mm2, %%mm6 \n\t" + + "movq (%0), %%mm2 \n\t" + "movq 64(%0), %%mm0 \n\t" + SUMSUB_BA( %%mm0, %%mm2 ) + SUMSUB_BA( %%mm6, %%mm0 ) + SUMSUB_BA( %%mm4, %%mm2 ) + SUMSUB_BA( %%mm7, %%mm6 ) + SUMSUB_BA( %%mm5, %%mm4 ) + SUMSUB_BA( %%mm3, %%mm2 ) + SUMSUB_BA( %%mm1, %%mm0 ) + :: "r"(block) + ); +} + +static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) +{ + int i; + int16_t __attribute__ ((aligned(8))) b2[64]; + + block[0] += 32; + + for(i=0; i<2; i++){ + uint64_t tmp; + + h264_idct8_1d(block+4*i); + + asm volatile( + "movq %%mm7, %0 \n\t" + TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 ) + "movq %%mm0, 8(%1) \n\t" + "movq %%mm6, 24(%1) \n\t" + "movq %%mm7, 40(%1) \n\t" + "movq %%mm4, 56(%1) \n\t" + "movq %0, %%mm7 \n\t" + TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 ) + "movq %%mm7, (%1) \n\t" + "movq %%mm1, 16(%1) \n\t" + "movq %%mm0, 32(%1) \n\t" + "movq %%mm3, 48(%1) \n\t" + : "=m"(tmp) + : "r"(b2+32*i) + : "memory" + ); + } + + for(i=0; i<2; i++){ + h264_idct8_1d(b2+4*i); + + asm volatile( + "psraw $6, %%mm7 \n\t" + "psraw $6, %%mm6 \n\t" + "psraw $6, %%mm5 \n\t" + "psraw $6, %%mm4 \n\t" + "psraw $6, %%mm3 \n\t" + "psraw $6, %%mm2 \n\t" + "psraw $6, %%mm1 \n\t" + "psraw $6, %%mm0 \n\t" + + "movq %%mm7, (%0) \n\t" + "movq %%mm5, 16(%0) \n\t" + "movq %%mm3, 32(%0) \n\t" + "movq %%mm1, 48(%0) \n\t" + "movq %%mm0, 64(%0) \n\t" + "movq %%mm2, 80(%0) \n\t" + "movq %%mm4, 96(%0) \n\t" + "movq %%mm6, 112(%0) \n\t" + :: "r"(b2+4*i) + : "memory" + ); + } + + add_pixels_clamped_mmx(b2, dst, stride); +} + +static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) +{ + int dc = (block[0] + 32) >> 6; + asm volatile( + "movd %0, %%mm0 \n\t" + "pshufw $0, %%mm0, %%mm0 \n\t" + "pxor %%mm1, %%mm1 \n\t" + "psubw %%mm0, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + ::"r"(dc) + ); + asm volatile( + "movd %0, %%mm2 \n\t" + "movd %1, %%mm3 \n\t" + "movd %2, %%mm4 \n\t" + "movd %3, %%mm5 \n\t" + "paddusb %%mm0, %%mm2 \n\t" + "paddusb %%mm0, %%mm3 \n\t" + "paddusb %%mm0, %%mm4 \n\t" + "paddusb %%mm0, %%mm5 \n\t" + "psubusb %%mm1, %%mm2 \n\t" + "psubusb %%mm1, %%mm3 \n\t" + "psubusb %%mm1, %%mm4 \n\t" + "psubusb %%mm1, %%mm5 \n\t" + "movd %%mm2, %0 \n\t" + "movd %%mm3, %1 \n\t" + "movd %%mm4, %2 \n\t" + "movd %%mm5, %3 \n\t" + :"+m"(*(uint32_t*)(dst+0*stride)), + "+m"(*(uint32_t*)(dst+1*stride)), + "+m"(*(uint32_t*)(dst+2*stride)), + "+m"(*(uint32_t*)(dst+3*stride)) + ); +} + +static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) +{ + int dc = (block[0] + 32) >> 6; + int y; + asm volatile( + "movd %0, %%mm0 \n\t" + "pshufw $0, %%mm0, %%mm0 \n\t" + "pxor %%mm1, %%mm1 \n\t" + "psubw %%mm0, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + ::"r"(dc) + ); + for(y=2; y--; dst += 4*stride){ + asm volatile( + "movq %0, %%mm2 \n\t" + "movq %1, %%mm3 \n\t" + "movq %2, %%mm4 \n\t" + "movq %3, %%mm5 \n\t" + "paddusb %%mm0, %%mm2 \n\t" + "paddusb %%mm0, %%mm3 \n\t" + "paddusb %%mm0, %%mm4 \n\t" + "paddusb %%mm0, %%mm5 \n\t" + "psubusb %%mm1, %%mm2 \n\t" + "psubusb %%mm1, %%mm3 \n\t" + "psubusb %%mm1, %%mm4 \n\t" + "psubusb %%mm1, %%mm5 \n\t" + "movq %%mm2, %0 \n\t" + "movq %%mm3, %1 \n\t" + "movq %%mm4, %2 \n\t" + "movq %%mm5, %3 \n\t" + :"+m"(*(uint64_t*)(dst+0*stride)), + "+m"(*(uint64_t*)(dst+1*stride)), + "+m"(*(uint64_t*)(dst+2*stride)), + "+m"(*(uint64_t*)(dst+3*stride)) + ); + } +} + + +/***********************************/ +/* deblocking */ + +// out: o = |x-y|>a +// clobbers: t +#define DIFF_GT_MMX(x,y,a,o,t)\ + "movq "#y", "#t" \n\t"\ + "movq "#x", "#o" \n\t"\ + "psubusb "#x", "#t" \n\t"\ + "psubusb "#y", "#o" \n\t"\ + "por "#t", "#o" \n\t"\ + "psubusb "#a", "#o" \n\t" + +// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 +// out: mm5=beta-1, mm7=mask +// clobbers: mm4,mm6 +#define H264_DEBLOCK_MASK(alpha1, beta1) \ + "pshufw $0, "#alpha1", %%mm4 \n\t"\ + "pshufw $0, "#beta1 ", %%mm5 \n\t"\ + "packuswb %%mm4, %%mm4 \n\t"\ + "packuswb %%mm5, %%mm5 \n\t"\ + DIFF_GT_MMX(%%mm1, %%mm2, %%mm4, %%mm7, %%mm6) /* |p0-q0| > alpha-1 */\ + DIFF_GT_MMX(%%mm0, %%mm1, %%mm5, %%mm4, %%mm6) /* |p1-p0| > beta-1 */\ + "por %%mm4, %%mm7 \n\t"\ + DIFF_GT_MMX(%%mm3, %%mm2, %%mm5, %%mm4, %%mm6) /* |q1-q0| > beta-1 */\ + "por %%mm4, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "pcmpeqb %%mm6, %%mm7 \n\t" + +// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) +// out: mm1=p0' mm2=q0' +// clobbers: mm0,3-6 +#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\ + /* a = q0^p0^((p1-q1)>>2) */\ + "movq %%mm0, %%mm4 \n\t"\ + "psubb %%mm3, %%mm4 \n\t"\ + "psrlw $2, %%mm4 \n\t"\ + "pxor %%mm1, %%mm4 \n\t"\ + "pxor %%mm2, %%mm4 \n\t"\ + /* b = p0^(q1>>2) */\ + "psrlw $2, %%mm3 \n\t"\ + "pand "#pb_3f", %%mm3 \n\t"\ + "movq %%mm1, %%mm5 \n\t"\ + "pxor %%mm3, %%mm5 \n\t"\ + /* c = q0^(p1>>2) */\ + "psrlw $2, %%mm0 \n\t"\ + "pand "#pb_3f", %%mm0 \n\t"\ + "movq %%mm2, %%mm6 \n\t"\ + "pxor %%mm0, %%mm6 \n\t"\ + /* d = (c^b) & ~(b^a) & 1 */\ + "pxor %%mm5, %%mm6 \n\t"\ + "pxor %%mm4, %%mm5 \n\t"\ + "pandn %%mm6, %%mm5 \n\t"\ + "pand "#pb_01", %%mm5 \n\t"\ + /* delta = (avg(q0, p1>>2) + (d&a)) + * - (avg(p0, q1>>2) + (d&~a)) */\ + "pavgb %%mm2, %%mm0 \n\t"\ + "pand %%mm5, %%mm4 \n\t"\ + "paddusb %%mm4, %%mm0 \n\t"\ + "pavgb %%mm1, %%mm3 \n\t"\ + "pxor %%mm5, %%mm4 \n\t"\ + "paddusb %%mm4, %%mm3 \n\t"\ + /* p0 += clip(delta, -tc0, tc0) + * q0 -= clip(delta, -tc0, tc0) */\ + "movq %%mm0, %%mm4 \n\t"\ + "psubusb %%mm3, %%mm0 \n\t"\ + "psubusb %%mm4, %%mm3 \n\t"\ + "pminub %%mm7, %%mm0 \n\t"\ + "pminub %%mm7, %%mm3 \n\t"\ + "paddusb %%mm0, %%mm1 \n\t"\ + "paddusb %%mm3, %%mm2 \n\t"\ + "psubusb %%mm3, %%mm1 \n\t"\ + "psubusb %%mm0, %%mm2 \n\t" + +// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) %8=mm_bone +// out: (q1addr) = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 ) +// clobbers: q2, tmp, tc0 +#define H264_DEBLOCK_Q1(p1, q2, q2addr, q1addr, tc0, tmp)\ + "movq %%mm1, "#tmp" \n\t"\ + "pavgb %%mm2, "#tmp" \n\t"\ + "pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\ + "pxor "q2addr", "#tmp" \n\t"\ + "pand %8, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\ + "psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\ + "movq "#p1", "#tmp" \n\t"\ + "psubusb "#tc0", "#tmp" \n\t"\ + "paddusb "#p1", "#tc0" \n\t"\ + "pmaxub "#tmp", "#q2" \n\t"\ + "pminub "#tc0", "#q2" \n\t"\ + "movq "#q2", "q1addr" \n\t" + +static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) +{ + uint64_t tmp0; + uint64_t tc = (uint8_t)tc0[1]*0x01010000 | (uint8_t)tc0[0]*0x0101; + // with luma, tc0=0 doesn't mean no filtering, so we need a separate input mask + uint32_t mask[2] = { (tc0[0]>=0)*0xffffffff, (tc0[1]>=0)*0xffffffff }; + + asm volatile( + "movq (%1,%3), %%mm0 \n\t" //p1 + "movq (%1,%3,2), %%mm1 \n\t" //p0 + "movq (%2), %%mm2 \n\t" //q0 + "movq (%2,%3), %%mm3 \n\t" //q1 + H264_DEBLOCK_MASK(%6, %7) + "pand %5, %%mm7 \n\t" + "movq %%mm7, %0 \n\t" + + /* filter p1 */ + "movq (%1), %%mm3 \n\t" //p2 + DIFF_GT_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1 + "pandn %%mm7, %%mm6 \n\t" + "pcmpeqb %%mm7, %%mm6 \n\t" + "pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|beta-1 + "pandn %0, %%mm6 \n\t" + "pcmpeqb %0, %%mm6 \n\t" + "pand %0, %%mm6 \n\t" + "pshufw $80, %4, %%mm5 \n\t" + "pand %%mm6, %%mm5 \n\t" + "pand %8, %%mm6 \n\t" + "paddb %%mm6, %%mm7 \n\t" + "movq (%2,%3), %%mm3 \n\t" + H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) + + /* filter p0, q0 */ + H264_DEBLOCK_P0_Q0(%8, %9) + "movq %%mm1, (%1,%3,2) \n\t" + "movq %%mm2, (%2) \n\t" + + : "=m"(tmp0) + : "r"(pix-3*stride), "r"(pix), "r"((long)stride), + "m"(tc), "m"(*(uint64_t*)mask), "m"(alpha1), "m"(beta1), + "m"(mm_bone), "m"(ff_pb_3F) + ); +} + +static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + if((tc0[0] & tc0[1]) >= 0) + h264_loop_filter_luma_mmx2(pix, stride, alpha-1, beta-1, tc0); + if((tc0[2] & tc0[3]) >= 0) + h264_loop_filter_luma_mmx2(pix+8, stride, alpha-1, beta-1, tc0+2); +} +static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + //FIXME: could cut some load/stores by merging transpose with filter + // also, it only needs to transpose 6x8 + uint8_t trans[8*8]; + int i; + for(i=0; i<2; i++, pix+=8*stride, tc0+=2) { + if((tc0[0] & tc0[1]) < 0) + continue; + transpose4x4(trans, pix-4, 8, stride); + transpose4x4(trans +4*8, pix, 8, stride); + transpose4x4(trans+4, pix-4+4*stride, 8, stride); + transpose4x4(trans+4+4*8, pix +4*stride, 8, stride); + h264_loop_filter_luma_mmx2(trans+4*8, 8, alpha-1, beta-1, tc0); + transpose4x4(pix-2, trans +2*8, stride, 8); + transpose4x4(pix-2+4*stride, trans+4+2*8, stride, 8); + } +} + +static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) +{ + asm volatile( + "movq (%0), %%mm0 \n\t" //p1 + "movq (%0,%2), %%mm1 \n\t" //p0 + "movq (%1), %%mm2 \n\t" //q0 + "movq (%1,%2), %%mm3 \n\t" //q1 + H264_DEBLOCK_MASK(%4, %5) + "movd %3, %%mm6 \n\t" + "punpcklbw %%mm6, %%mm6 \n\t" + "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask + H264_DEBLOCK_P0_Q0(%6, %7) + "movq %%mm1, (%0,%2) \n\t" + "movq %%mm2, (%1) \n\t" + + :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), + "r"(*(uint32_t*)tc0), + "m"(alpha1), "m"(beta1), "m"(mm_bone), "m"(ff_pb_3F) + ); +} + +static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + h264_loop_filter_chroma_mmx2(pix, stride, alpha-1, beta-1, tc0); +} + +static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +{ + //FIXME: could cut some load/stores by merging transpose with filter + uint8_t trans[8*4]; + transpose4x4(trans, pix-2, 8, stride); + transpose4x4(trans+4, pix-2+4*stride, 8, stride); + h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0); + transpose4x4(pix-2, trans, stride, 8); + transpose4x4(pix-2+4*stride, trans+4, stride, 8); +} + +// p0 = (p0 + q1 + 2*p1 + 2) >> 2 +#define H264_FILTER_CHROMA4(p0, p1, q1, one) \ + "movq "#p0", %%mm4 \n\t"\ + "pxor "#q1", %%mm4 \n\t"\ + "pand "#one", %%mm4 \n\t" /* mm4 = (p0^q1)&1 */\ + "pavgb "#q1", "#p0" \n\t"\ + "psubusb %%mm4, "#p0" \n\t"\ + "pavgb "#p1", "#p0" \n\t" /* dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) */\ + +static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1) +{ + asm volatile( + "movq (%0), %%mm0 \n\t" + "movq (%0,%2), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" + "movq (%1,%2), %%mm3 \n\t" + H264_DEBLOCK_MASK(%3, %4) + "movq %%mm1, %%mm5 \n\t" + "movq %%mm2, %%mm6 \n\t" + H264_FILTER_CHROMA4(%%mm1, %%mm0, %%mm3, %5) //p0' + H264_FILTER_CHROMA4(%%mm2, %%mm3, %%mm0, %5) //q0' + "psubb %%mm5, %%mm1 \n\t" + "psubb %%mm6, %%mm2 \n\t" + "pand %%mm7, %%mm1 \n\t" + "pand %%mm7, %%mm2 \n\t" + "paddb %%mm5, %%mm1 \n\t" + "paddb %%mm6, %%mm2 \n\t" + "movq %%mm1, (%0,%2) \n\t" + "movq %%mm2, (%1) \n\t" + :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), + "m"(alpha1), "m"(beta1), "m"(mm_bone) + ); +} + +static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) +{ + h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1); +} + +static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta) +{ + //FIXME: could cut some load/stores by merging transpose with filter + uint8_t trans[8*4]; + transpose4x4(trans, pix-2, 8, stride); + transpose4x4(trans+4, pix-2+4*stride, 8, stride); + h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1); + transpose4x4(pix-2, trans, stride, 8); + transpose4x4(pix-2+4*stride, trans+4, stride, 8); +} + + +/***********************************/ +/* motion compensation */ + +#define QPEL_H264V(A,B,C,D,E,F,OP)\ + "movd (%0), "#F" \n\t"\ + "movq "#C", %%mm6 \n\t"\ + "paddw "#D", %%mm6 \n\t"\ + "psllw $2, %%mm6 \n\t"\ + "psubw "#B", %%mm6 \n\t"\ + "psubw "#E", %%mm6 \n\t"\ + "pmullw %4, %%mm6 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, "#F" \n\t"\ + "paddw %5, "#A" \n\t"\ + "paddw "#F", "#A" \n\t"\ + "paddw "#A", %%mm6 \n\t"\ + "psraw $5, %%mm6 \n\t"\ + "packuswb %%mm6, %%mm6 \n\t"\ + OP(%%mm6, (%1), A, d)\ + "add %3, %1 \n\t" + +#define QPEL_H264HV(A,B,C,D,E,F,OF)\ + "movd (%0), "#F" \n\t"\ + "movq "#C", %%mm6 \n\t"\ + "paddw "#D", %%mm6 \n\t"\ + "psllw $2, %%mm6 \n\t"\ + "psubw "#B", %%mm6 \n\t"\ + "psubw "#E", %%mm6 \n\t"\ + "pmullw %3, %%mm6 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, "#F" \n\t"\ + "paddw "#F", "#A" \n\t"\ + "paddw "#A", %%mm6 \n\t"\ + "movq %%mm6, "#OF"(%1) \n\t" + +#define QPEL_H264(OPNAME, OP, MMX)\ +static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + int h=4;\ +\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %5, %%mm4 \n\t"\ + "movq %6, %%mm5 \n\t"\ + "1: \n\t"\ + "movd -1(%0), %%mm1 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "movd 1(%0), %%mm3 \n\t"\ + "movd 2(%0), %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "paddw %%mm0, %%mm1 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "movd -2(%0), %%mm0 \n\t"\ + "movd 3(%0), %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm3, %%mm0 \n\t"\ + "psllw $2, %%mm2 \n\t"\ + "psubw %%mm1, %%mm2 \n\t"\ + "pmullw %%mm4, %%mm2 \n\t"\ + "paddw %%mm5, %%mm0 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm6, d)\ + "add %3, %0 \n\t"\ + "add %4, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(src), "+c"(dst), "+m"(h)\ + : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ +}\ +static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ + int h=4;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %0, %%mm4 \n\t"\ + "movq %1, %%mm5 \n\t"\ + :: "m"(ff_pw_5), "m"(ff_pw_16)\ + );\ + do{\ + asm volatile(\ + "movd -1(%0), %%mm1 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "movd 1(%0), %%mm3 \n\t"\ + "movd 2(%0), %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "paddw %%mm0, %%mm1 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "movd -2(%0), %%mm0 \n\t"\ + "movd 3(%0), %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm3, %%mm0 \n\t"\ + "psllw $2, %%mm2 \n\t"\ + "psubw %%mm1, %%mm2 \n\t"\ + "pmullw %%mm4, %%mm2 \n\t"\ + "paddw %%mm5, %%mm0 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "movd (%2), %%mm3 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + PAVGB" %%mm3, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm6, d)\ + "add %4, %0 \n\t"\ + "add %4, %1 \n\t"\ + "add %3, %2 \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2)\ + : "D"((long)src2Stride), "S"((long)dstStride)\ + : "memory"\ + );\ + }while(--h);\ +}\ +static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + src -= 2*srcStride;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ +}\ +static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + int h=4;\ + int w=3;\ + src -= 2*srcStride+2;\ + while(w--){\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\ + \ + : "+a"(src)\ + : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ + : "memory"\ + );\ + tmp += 4;\ + src += 4 - 9*srcStride;\ + }\ + tmp -= 3*4;\ + asm volatile(\ + "movq %4, %%mm6 \n\t"\ + "1: \n\t"\ + "movq (%0), %%mm0 \n\t"\ + "paddw 10(%0), %%mm0 \n\t"\ + "movq 2(%0), %%mm1 \n\t"\ + "paddw 8(%0), %%mm1 \n\t"\ + "movq 4(%0), %%mm2 \n\t"\ + "paddw 6(%0), %%mm2 \n\t"\ + "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ + "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ + "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ + "paddsw %%mm2, %%mm0 \n\t"\ + "psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\ + "paddw %%mm6, %%mm2 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 +32 */\ + "psraw $6, %%mm0 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm7, d)\ + "add $24, %0 \n\t"\ + "add %3, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(tmp), "+c"(dst), "+m"(h)\ + : "S"((long)dstStride), "m"(ff_pw_32)\ + : "memory"\ + );\ +}\ +\ +static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + int h=8;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %5, %%mm6 \n\t"\ + "1: \n\t"\ + "movq (%0), %%mm0 \n\t"\ + "movq 1(%0), %%mm2 \n\t"\ + "movq %%mm0, %%mm1 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpckhbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t"\ + "psllw $2, %%mm0 \n\t"\ + "psllw $2, %%mm1 \n\t"\ + "movq -1(%0), %%mm2 \n\t"\ + "movq 2(%0), %%mm4 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "movq %%mm4, %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm4, %%mm2 \n\t"\ + "paddw %%mm3, %%mm5 \n\t"\ + "psubw %%mm2, %%mm0 \n\t"\ + "psubw %%mm5, %%mm1 \n\t"\ + "pmullw %%mm6, %%mm0 \n\t"\ + "pmullw %%mm6, %%mm1 \n\t"\ + "movd -2(%0), %%mm2 \n\t"\ + "movd 7(%0), %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "movq %6, %%mm5 \n\t"\ + "paddw %%mm5, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm4, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm1, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm5, q)\ + "add %3, %0 \n\t"\ + "add %4, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(src), "+c"(dst), "+m"(h)\ + : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ +}\ +\ +static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ + int h=8;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %0, %%mm6 \n\t"\ + :: "m"(ff_pw_5)\ + );\ + do{\ + asm volatile(\ + "movq (%0), %%mm0 \n\t"\ + "movq 1(%0), %%mm2 \n\t"\ + "movq %%mm0, %%mm1 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpckhbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t"\ + "psllw $2, %%mm0 \n\t"\ + "psllw $2, %%mm1 \n\t"\ + "movq -1(%0), %%mm2 \n\t"\ + "movq 2(%0), %%mm4 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "movq %%mm4, %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm4, %%mm2 \n\t"\ + "paddw %%mm3, %%mm5 \n\t"\ + "psubw %%mm2, %%mm0 \n\t"\ + "psubw %%mm5, %%mm1 \n\t"\ + "pmullw %%mm6, %%mm0 \n\t"\ + "pmullw %%mm6, %%mm1 \n\t"\ + "movd -2(%0), %%mm2 \n\t"\ + "movd 7(%0), %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "movq %5, %%mm5 \n\t"\ + "paddw %%mm5, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm4, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "movq (%2), %%mm4 \n\t"\ + "packuswb %%mm1, %%mm0 \n\t"\ + PAVGB" %%mm4, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm5, q)\ + "add %4, %0 \n\t"\ + "add %4, %1 \n\t"\ + "add %3, %2 \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2)\ + : "D"((long)src2Stride), "S"((long)dstStride),\ + "m"(ff_pw_16)\ + : "memory"\ + );\ + }while(--h);\ +}\ +\ +static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + int w= 2;\ + src -= 2*srcStride;\ + \ + while(w--){\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + if(h==16){\ + asm volatile(\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + }\ + src += 4-(h+5)*srcStride;\ + dst += 4-h*dstStride;\ + }\ +}\ +static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ + int h = size;\ + int w = (size+8)>>2;\ + src -= 2*srcStride+2;\ + while(w--){\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\ + QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\ + QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\ + : "+a"(src)\ + : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ + : "memory"\ + );\ + if(size==16){\ + asm volatile(\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\ + QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\ + QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\ + : "+a"(src)\ + : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ + : "memory"\ + );\ + }\ + tmp += 4;\ + src += 4 - (size+5)*srcStride;\ + }\ + tmp -= size+8;\ + w = size>>4;\ + do{\ + h = size;\ + asm volatile(\ + "movq %4, %%mm6 \n\t"\ + "1: \n\t"\ + "movq (%0), %%mm0 \n\t"\ + "movq 8(%0), %%mm3 \n\t"\ + "movq 2(%0), %%mm1 \n\t"\ + "movq 10(%0), %%mm4 \n\t"\ + "paddw %%mm4, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t"\ + "paddw 18(%0), %%mm3 \n\t"\ + "paddw 16(%0), %%mm4 \n\t"\ + "movq 4(%0), %%mm2 \n\t"\ + "movq 12(%0), %%mm5 \n\t"\ + "paddw 6(%0), %%mm2 \n\t"\ + "paddw 14(%0), %%mm5 \n\t"\ + "psubw %%mm1, %%mm0 \n\t"\ + "psubw %%mm4, %%mm3 \n\t"\ + "psraw $2, %%mm0 \n\t"\ + "psraw $2, %%mm3 \n\t"\ + "psubw %%mm1, %%mm0 \n\t"\ + "psubw %%mm4, %%mm3 \n\t"\ + "paddsw %%mm2, %%mm0 \n\t"\ + "paddsw %%mm5, %%mm3 \n\t"\ + "psraw $2, %%mm0 \n\t"\ + "psraw $2, %%mm3 \n\t"\ + "paddw %%mm6, %%mm2 \n\t"\ + "paddw %%mm6, %%mm5 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm5, %%mm3 \n\t"\ + "psraw $6, %%mm0 \n\t"\ + "psraw $6, %%mm3 \n\t"\ + "packuswb %%mm3, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm7, q)\ + "add $48, %0 \n\t"\ + "add %3, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(tmp), "+c"(dst), "+m"(h)\ + : "S"((long)dstStride), "m"(ff_pw_32)\ + : "memory"\ + );\ + tmp += 8 - size*24;\ + dst += 8 - size*dstStride;\ + }while(w--);\ +}\ +\ +static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ +}\ +static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ +}\ +\ +static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ + src += 8*dstStride;\ + dst += 8*dstStride;\ + src2 += 8*src2Stride;\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ +}\ +\ +static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\ +}\ +\ +static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ +}\ +\ +static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +{\ + asm volatile(\ + "movq %5, %%mm6 \n\t"\ + "movq (%1), %%mm0 \n\t"\ + "movq 24(%1), %%mm1 \n\t"\ + "paddw %%mm6, %%mm0 \n\t"\ + "paddw %%mm6, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + "packuswb %%mm1, %%mm1 \n\t"\ + PAVGB" (%0), %%mm0 \n\t"\ + PAVGB" (%0,%3), %%mm1 \n\t"\ + OP(%%mm0, (%2), %%mm4, d)\ + OP(%%mm1, (%2,%4), %%mm5, d)\ + "lea (%0,%3,2), %0 \n\t"\ + "lea (%2,%4,2), %2 \n\t"\ + "movq 48(%1), %%mm0 \n\t"\ + "movq 72(%1), %%mm1 \n\t"\ + "paddw %%mm6, %%mm0 \n\t"\ + "paddw %%mm6, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + "packuswb %%mm1, %%mm1 \n\t"\ + PAVGB" (%0), %%mm0 \n\t"\ + PAVGB" (%0,%3), %%mm1 \n\t"\ + OP(%%mm0, (%2), %%mm4, d)\ + OP(%%mm1, (%2,%4), %%mm5, d)\ + :"+a"(src8), "+c"(src16), "+d"(dst)\ + :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\ + :"memory");\ +}\ +static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +{\ + asm volatile(\ + "movq %0, %%mm6 \n\t"\ + ::"m"(ff_pw_16)\ + );\ + while(h--){\ + asm volatile(\ + "movq (%1), %%mm0 \n\t"\ + "movq 8(%1), %%mm1 \n\t"\ + "paddw %%mm6, %%mm0 \n\t"\ + "paddw %%mm6, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm1, %%mm0 \n\t"\ + PAVGB" (%0), %%mm0 \n\t"\ + OP(%%mm0, (%2), %%mm5, q)\ + ::"a"(src8), "c"(src16), "d"(dst)\ + :"memory");\ + src8 += src8Stride;\ + src16 += 24;\ + dst += dstStride;\ + }\ +}\ +static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +{\ + OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\ + OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\ +}\ + + +#define H264_MC(OPNAME, SIZE, MMX) \ +static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _mmx(dst, src, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const half= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const half= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfV, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfV, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfV, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfV, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4];\ + int16_t * const tmp= (int16_t*)temp;\ + OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, tmp, src, stride, SIZE, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + uint8_t * const halfHV= (uint8_t*)temp;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + uint8_t * const halfHV= (uint8_t*)temp;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\ + uint8_t * const halfHV= ((uint8_t*)temp);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\ + uint8_t * const halfHV= ((uint8_t*)temp);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ +}\ + + +#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" +#define AVG_3DNOW_OP(a,b,temp, size) \ +"mov" #size " " #b ", " #temp " \n\t"\ +"pavgusb " #temp ", " #a " \n\t"\ +"mov" #size " " #a ", " #b " \n\t" +#define AVG_MMX2_OP(a,b,temp, size) \ +"mov" #size " " #b ", " #temp " \n\t"\ +"pavgb " #temp ", " #a " \n\t"\ +"mov" #size " " #a ", " #b " \n\t" + +#define PAVGB "pavgusb" +QPEL_H264(put_, PUT_OP, 3dnow) +QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) +#undef PAVGB +#define PAVGB "pavgb" +QPEL_H264(put_, PUT_OP, mmx2) +QPEL_H264(avg_, AVG_MMX2_OP, mmx2) +#undef PAVGB + +H264_MC(put_, 4, 3dnow) +H264_MC(put_, 8, 3dnow) +H264_MC(put_, 16,3dnow) +H264_MC(avg_, 4, 3dnow) +H264_MC(avg_, 8, 3dnow) +H264_MC(avg_, 16,3dnow) +H264_MC(put_, 4, mmx2) +H264_MC(put_, 8, mmx2) +H264_MC(put_, 16,mmx2) +H264_MC(avg_, 4, mmx2) +H264_MC(avg_, 8, mmx2) +H264_MC(avg_, 16,mmx2) + + +#define H264_CHROMA_OP(S,D) +#define H264_CHROMA_OP4(S,D,T) +#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx +#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx +#define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2 +#define H264_CHROMA_MC8_MV0 put_pixels8_mmx +#include "dsputil_h264_template_mmx.c" +#undef H264_CHROMA_OP +#undef H264_CHROMA_OP4 +#undef H264_CHROMA_MC8_TMPL +#undef H264_CHROMA_MC4_TMPL +#undef H264_CHROMA_MC2_TMPL +#undef H264_CHROMA_MC8_MV0 + +#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t" +#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\ + "pavgb " #T ", " #D " \n\t" +#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2 +#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2 +#define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2 +#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2 +#include "dsputil_h264_template_mmx.c" +#undef H264_CHROMA_OP +#undef H264_CHROMA_OP4 +#undef H264_CHROMA_MC8_TMPL +#undef H264_CHROMA_MC4_TMPL +#undef H264_CHROMA_MC2_TMPL +#undef H264_CHROMA_MC8_MV0 + +#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t" +#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\ + "pavgusb " #T ", " #D " \n\t" +#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow +#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_3dnow +#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow +#include "dsputil_h264_template_mmx.c" +#undef H264_CHROMA_OP +#undef H264_CHROMA_OP4 +#undef H264_CHROMA_MC8_TMPL +#undef H264_CHROMA_MC4_TMPL +#undef H264_CHROMA_MC8_MV0 + +/***********************************/ +/* weighted prediction */ + +static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h) +{ + int x, y; + offset <<= log2_denom; + offset += (1 << log2_denom) >> 1; + asm volatile( + "movd %0, %%mm4 \n\t" + "movd %1, %%mm5 \n\t" + "movd %2, %%mm6 \n\t" + "pshufw $0, %%mm4, %%mm4 \n\t" + "pshufw $0, %%mm5, %%mm5 \n\t" + "pxor %%mm7, %%mm7 \n\t" + :: "g"(weight), "g"(offset), "g"(log2_denom) + ); + for(y=0; y + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "common.h" +#include "../dsputil.h" + +#include "mmx.h" + +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ (align))) + +#define ROW_SHIFT 11 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + +static inline void mmxext_row (const int16_t * table, const int32_t * rounder) +{ + movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 + pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 + pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder +} + +static inline void mmxext_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmxext_row_mid (int16_t * row, int store, + int offset, const int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 + + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 +} + +static inline void mmx_row (const int16_t * table, const int32_t * rounder) +{ + pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 + punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 + punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder +} + +static inline void mmx_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 + + pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 + + psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 + + por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmx_row_mid (int16_t * row, int store, + int offset, const int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 +} + + +#if 0 +// C column IDCT - its just here to document the MMXEXT and MMX versions +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +// MMX column IDCT +static inline void idct_col (int16_t * col, int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); // mm0 = T1 + + movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 + movq_r2r (mm0, mm2); // mm2 = T1 + + movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 + pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 + + movq_m2r (*_T3, mm5); // mm5 = T3 + pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 + + movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 + movq_r2r (mm5, mm7); // mm7 = T3-1 + + movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 + psubsw_r2r (mm4, mm0); // mm0 = v17 + + movq_m2r (*_T2, mm4); // mm4 = T2 + pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 + + paddsw_r2r (mm2, mm1); // mm1 = u17 + pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 + + /* slot */ + + movq_r2r (mm4, mm2); // mm2 = T2 + paddsw_r2r (mm3, mm5); // mm5 = T3*x3 + + pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 + paddsw_r2r (mm6, mm7); // mm7 = T3*x5 + + psubsw_r2r (mm6, mm5); // mm5 = v35 + paddsw_r2r (mm3, mm7); // mm7 = u35 + + movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 + movq_r2r (mm0, mm6); // mm6 = v17 + + pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 + psubsw_r2r (mm5, mm0); // mm0 = b3 + + psubsw_r2r (mm3, mm4); // mm4 = v26 + paddsw_r2r (mm6, mm5); // mm5 = v12 + + movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 + movq_r2r (mm1, mm6); // mm6 = u17 + + paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 + paddsw_r2r (mm7, mm6); // mm6 = b0 + + psubsw_r2r (mm7, mm1); // mm1 = u12 + movq_r2r (mm1, mm7); // mm7 = u12 + + movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 + paddsw_r2r (mm5, mm1); // mm1 = u12+v12 + + movq_m2r (*_C4, mm0); // mm0 = C4/2 + psubsw_r2r (mm5, mm7); // mm7 = u12-v12 + + movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 + pmulhw_r2r (mm0, mm1); // mm1 = b1/2 + + movq_r2r (mm4, mm6); // mm6 = v26 + pmulhw_r2r (mm0, mm7); // mm7 = b2/2 + + movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 + movq_r2r (mm3, mm0); // mm0 = x0 + + psubsw_r2r (mm5, mm3); // mm3 = v04 + paddsw_r2r (mm5, mm0); // mm0 = u04 + + paddsw_r2r (mm3, mm4); // mm4 = a1 + movq_r2r (mm0, mm5); // mm5 = u04 + + psubsw_r2r (mm6, mm3); // mm3 = a2 + paddsw_r2r (mm2, mm5); // mm5 = a0 + + paddsw_r2r (mm1, mm1); // mm1 = b1 + psubsw_r2r (mm2, mm0); // mm0 = a3 + + paddsw_r2r (mm7, mm7); // mm7 = b2 + movq_r2r (mm3, mm2); // mm2 = a2 + + movq_r2r (mm4, mm6); // mm6 = a1 + paddsw_r2r (mm7, mm3); // mm3 = a2+b2 + + psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 + paddsw_r2r (mm1, mm4); // mm4 = a1+b1 + + psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 + psubsw_r2r (mm1, mm6); // mm6 = a1-b1 + + movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 + psubsw_r2r (mm7, mm2); // mm2 = a2-b2 + + psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 + movq_r2r (mm5, mm7); // mm7 = a0 + + movq_r2m (mm4, *(col+offset+1*8)); // save y1 + psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 + + movq_r2m (mm3, *(col+offset+2*8)); // save y2 + paddsw_r2r (mm1, mm5); // mm5 = a0+b0 + + movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 + psubsw_r2r (mm1, mm7); // mm7 = a0-b0 + + psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 + movq_r2r (mm0, mm3); // mm3 = a3 + + movq_r2m (mm2, *(col+offset+5*8)); // save y5 + psubsw_r2r (mm4, mm3); // mm3 = a3-b3 + + psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 + paddsw_r2r (mm0, mm4); // mm4 = a3+b3 + + movq_r2m (mm5, *(col+offset+0*8)); // save y0 + psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 + + movq_r2m (mm6, *(col+offset+6*8)); // save y6 + psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 + + movq_r2m (mm7, *(col+offset+7*8)); // save y7 + + movq_r2m (mm3, *(col+offset+4*8)); // save y4 + + movq_r2m (mm4, *(col+offset+3*8)); // save y3 + +#undef T1 +#undef T2 +#undef T3 +#undef C4 +} + +static const int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static const int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static const int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static const int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static const int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static const int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static const int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + +#undef COL_SHIFT +#undef ROW_SHIFT + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +void idct (int16_t * block) \ +{ \ + static const int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static const int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static const int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static const int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + +void ff_mmx_idct(DCTELEM *block); +void ff_mmxext_idct(DCTELEM *block); + +declare_idct (ff_mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +declare_idct (ff_mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + diff --git a/mpeg4/src/libavcodec/i386/idct_mmx_xvid.c b/mpeg4/src/libavcodec/i386/idct_mmx_xvid.c new file mode 100644 index 0000000000000000000000000000000000000000..ce5f0d436b1084754cee3eac917f34dc215663c6 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/idct_mmx_xvid.c @@ -0,0 +1,534 @@ +///**************************************************************************** +// * +// * XVID MPEG-4 VIDEO CODEC +// * - MMX and XMM forward discrete cosine transform - +// * +// * Copyright(C) 2001 Peter Ross +// * +// * This program is free software; you can redistribute it and/or modify it +// * under the terms of the GNU General Public License as published by +// * the Free Software Foundation; either version 2 of the License, or +// * (at your option) any later version. +// * +// * This program is distributed in the hope that it will be useful, +// * but WITHOUT ANY WARRANTY; without even the implied warranty of +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// * GNU General Public License for more details. +// * +// * You should have received a copy of the GNU General Public License +// * along with this program; if not, write to the Free Software Foundation, +// * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +// +// * +// * $Id: idct_mmx_xvid.c,v 1.4 2006/01/12 22:43:18 diego Exp $ +// * +// ***************************************************************************/ + +// **************************************************************************** +// +// Originally provided by Intel at AP-922 +// http://developer.intel.com/vtune/cbts/strmsimd/922down.htm +// (See more app notes at http://developer.intel.com/vtune/cbts/strmsimd/appnotes.htm) +// but in a limited edition. +// New macro implements a column part for precise iDCT +// The routine precision now satisfies IEEE standard 1180-1990. +// +// Copyright(C) 2000-2001 Peter Gubanov +// Rounding trick Copyright(C) 2000 Michel Lespinasse +// +// http://www.elecard.com/peter/idct.html +// http://www.linuxvideo.org/mpeg2dec/ +// +// ***************************************************************************/ +// +// These examples contain code fragments for first stage iDCT 8x8 +// (for rows) and first stage DCT 8x8 (for columns) +// + +// conversion to gcc syntax by michael niedermayer + + +#include +#include "../avcodec.h" + +//============================================================================= +// Macros and other preprocessor constants +//============================================================================= + +#define BITS_INV_ACC 5 // 4 or 5 for IEEE +#define SHIFT_INV_ROW (16 - BITS_INV_ACC) //11 +#define SHIFT_INV_COL (1 + BITS_INV_ACC) //6 +#define RND_INV_ROW (1024 * (6 - BITS_INV_ACC)) +#define RND_INV_COL (16 * (BITS_INV_ACC - 3)) +#define RND_INV_CORR (RND_INV_COL - 1) + +#define BITS_FRW_ACC 3 // 2 or 3 for accuracy +#define SHIFT_FRW_COL BITS_FRW_ACC +#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) +#define RND_FRW_ROW (262144*(BITS_FRW_ACC - 1)) + + +//----------------------------------------------------------------------------- +// Various memory constants (trigonometric values or rounding values) +//----------------------------------------------------------------------------- + + +static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = { + 13036,13036,13036,13036, // tg * (2<<16) + 0.5 + 27146,27146,27146,27146, // tg * (2<<16) + 0.5 + -21746,-21746,-21746,-21746, // tg * (2<<16) + 0.5 + 23170,23170,23170,23170}; // cos * (2<<15) + 0.5 + +static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = { + 65536,65536, + 3597,3597, + 2260,2260, + 1203,1203, + 0,0, + 120,120, + 512,512, + 512,512}; + +//----------------------------------------------------------------------------- +// +// The first stage iDCT 8x8 - inverse DCTs of rows +// +//----------------------------------------------------------------------------- +// The 8-point inverse DCT direct algorithm +//----------------------------------------------------------------------------- +// +// static const short w[32] = { +// FIX(cos_4_16), FIX(cos_2_16), FIX(cos_4_16), FIX(cos_6_16), +// FIX(cos_4_16), FIX(cos_6_16), -FIX(cos_4_16), -FIX(cos_2_16), +// FIX(cos_4_16), -FIX(cos_6_16), -FIX(cos_4_16), FIX(cos_2_16), +// FIX(cos_4_16), -FIX(cos_2_16), FIX(cos_4_16), -FIX(cos_6_16), +// FIX(cos_1_16), FIX(cos_3_16), FIX(cos_5_16), FIX(cos_7_16), +// FIX(cos_3_16), -FIX(cos_7_16), -FIX(cos_1_16), -FIX(cos_5_16), +// FIX(cos_5_16), -FIX(cos_1_16), FIX(cos_7_16), FIX(cos_3_16), +// FIX(cos_7_16), -FIX(cos_5_16), FIX(cos_3_16), -FIX(cos_1_16) }; +// +// #define DCT_8_INV_ROW(x, y) +// { +// int a0, a1, a2, a3, b0, b1, b2, b3; +// +// a0 =x[0]*w[0]+x[2]*w[1]+x[4]*w[2]+x[6]*w[3]; +// a1 =x[0]*w[4]+x[2]*w[5]+x[4]*w[6]+x[6]*w[7]; +// a2 = x[0] * w[ 8] + x[2] * w[ 9] + x[4] * w[10] + x[6] * w[11]; +// a3 = x[0] * w[12] + x[2] * w[13] + x[4] * w[14] + x[6] * w[15]; +// b0 = x[1] * w[16] + x[3] * w[17] + x[5] * w[18] + x[7] * w[19]; +// b1 = x[1] * w[20] + x[3] * w[21] + x[5] * w[22] + x[7] * w[23]; +// b2 = x[1] * w[24] + x[3] * w[25] + x[5] * w[26] + x[7] * w[27]; +// b3 = x[1] * w[28] + x[3] * w[29] + x[5] * w[30] + x[7] * w[31]; +// +// y[0] = SHIFT_ROUND ( a0 + b0 ); +// y[1] = SHIFT_ROUND ( a1 + b1 ); +// y[2] = SHIFT_ROUND ( a2 + b2 ); +// y[3] = SHIFT_ROUND ( a3 + b3 ); +// y[4] = SHIFT_ROUND ( a3 - b3 ); +// y[5] = SHIFT_ROUND ( a2 - b2 ); +// y[6] = SHIFT_ROUND ( a1 - b1 ); +// y[7] = SHIFT_ROUND ( a0 - b0 ); +// } +// +//----------------------------------------------------------------------------- +// +// In this implementation the outputs of the iDCT-1D are multiplied +// for rows 0,4 - by cos_4_16, +// for rows 1,7 - by cos_1_16, +// for rows 2,6 - by cos_2_16, +// for rows 3,5 - by cos_3_16 +// and are shifted to the left for better accuracy +// +// For the constants used, +// FIX(float_const) = (short) (float_const * (1<<15) + 0.5) +// +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// Tables for mmx processors +//----------------------------------------------------------------------------- + +// Table for rows 0,4 - constants are multiplied by cos_4_16 +static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = { + 16384,16384,16384,-16384, // movq-> w06 w04 w02 w00 + 21407,8867,8867,-21407, // w07 w05 w03 w01 + 16384,-16384,16384,16384, // w14 w12 w10 w08 + -8867,21407,-21407,-8867, // w15 w13 w11 w09 + 22725,12873,19266,-22725, // w22 w20 w18 w16 + 19266,4520,-4520,-12873, // w23 w21 w19 w17 + 12873,4520,4520,19266, // w30 w28 w26 w24 + -22725,19266,-12873,-22725, // w31 w29 w27 w25 +// Table for rows 1,7 - constants are multiplied by cos_1_16 + 22725,22725,22725,-22725, // movq-> w06 w04 w02 w00 + 29692,12299,12299,-29692, // w07 w05 w03 w01 + 22725,-22725,22725,22725, // w14 w12 w10 w08 + -12299,29692,-29692,-12299, // w15 w13 w11 w09 + 31521,17855,26722,-31521, // w22 w20 w18 w16 + 26722,6270,-6270,-17855, // w23 w21 w19 w17 + 17855,6270,6270,26722, // w30 w28 w26 w24 + -31521,26722,-17855,-31521, // w31 w29 w27 w25 +// Table for rows 2,6 - constants are multiplied by cos_2_16 + 21407,21407,21407,-21407, // movq-> w06 w04 w02 w00 + 27969,11585,11585,-27969, // w07 w05 w03 w01 + 21407,-21407,21407,21407, // w14 w12 w10 w08 + -11585,27969,-27969,-11585, // w15 w13 w11 w09 + 29692,16819,25172,-29692, // w22 w20 w18 w16 + 25172,5906,-5906,-16819, // w23 w21 w19 w17 + 16819,5906,5906,25172, // w30 w28 w26 w24 + -29692,25172,-16819,-29692, // w31 w29 w27 w25 +// Table for rows 3,5 - constants are multiplied by cos_3_16 + 19266,19266,19266,-19266, // movq-> w06 w04 w02 w00 + 25172,10426,10426,-25172, // w07 w05 w03 w01 + 19266,-19266,19266,19266, // w14 w12 w10 w08 + -10426,25172,-25172,-10426, // w15 w13 w11 w09 + 26722,15137,22654,-26722, // w22 w20 w18 w16 + 22654,5315,-5315,-15137, // w23 w21 w19 w17 + 15137,5315,5315,22654, // w30 w28 w26 w24 + -26722,22654,-15137,-26722, // w31 w29 w27 w25 +}; +//----------------------------------------------------------------------------- +// Tables for xmm processors +//----------------------------------------------------------------------------- + +// %3 for rows 0,4 - constants are multiplied by cos_4_16 +static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = { + 16384,21407,16384,8867, // movq-> w05 w04 w01 w00 + 16384,8867,-16384,-21407, // w07 w06 w03 w02 + 16384,-8867,16384,-21407, // w13 w12 w09 w08 + -16384,21407,16384,-8867, // w15 w14 w11 w10 + 22725,19266,19266,-4520, // w21 w20 w17 w16 + 12873,4520,-22725,-12873, // w23 w22 w19 w18 + 12873,-22725,4520,-12873, // w29 w28 w25 w24 + 4520,19266,19266,-22725, // w31 w30 w27 w26 +// %3 for rows 1,7 - constants are multiplied by cos_1_16 + 22725,29692,22725,12299, // movq-> w05 w04 w01 w00 + 22725,12299,-22725,-29692, // w07 w06 w03 w02 + 22725,-12299,22725,-29692, // w13 w12 w09 w08 + -22725,29692,22725,-12299, // w15 w14 w11 w10 + 31521,26722,26722,-6270, // w21 w20 w17 w16 + 17855,6270,-31521,-17855, // w23 w22 w19 w18 + 17855,-31521,6270,-17855, // w29 w28 w25 w24 + 6270,26722,26722,-31521, // w31 w30 w27 w26 +// %3 for rows 2,6 - constants are multiplied by cos_2_16 + 21407,27969,21407,11585, // movq-> w05 w04 w01 w00 + 21407,11585,-21407,-27969, // w07 w06 w03 w02 + 21407,-11585,21407,-27969, // w13 w12 w09 w08 + -21407,27969,21407,-11585, // w15 w14 w11 w10 + 29692,25172,25172,-5906, // w21 w20 w17 w16 + 16819,5906,-29692,-16819, // w23 w22 w19 w18 + 16819,-29692,5906,-16819, // w29 w28 w25 w24 + 5906,25172,25172,-29692, // w31 w30 w27 w26 +// %3 for rows 3,5 - constants are multiplied by cos_3_16 + 19266,25172,19266,10426, // movq-> w05 w04 w01 w00 + 19266,10426,-19266,-25172, // w07 w06 w03 w02 + 19266,-10426,19266,-25172, // w13 w12 w09 w08 + -19266,25172,19266,-10426, // w15 w14 w11 w10 + 26722,22654,22654,-5315, // w21 w20 w17 w16 + 15137,5315,-26722,-15137, // w23 w22 w19 w18 + 15137,-26722,5315,-15137, // w29 w28 w25 w24 + 5315,22654,22654,-26722, // w31 w30 w27 w26 +}; +//============================================================================= +// Helper macros for the code +//============================================================================= + +//----------------------------------------------------------------------------- +// DCT_8_INV_ROW_MMX( INP, OUT, TABLE, ROUNDER +//----------------------------------------------------------------------------- + +#define DCT_8_INV_ROW_MMX(A1,A2,A3,A4)\ + "movq " #A1 ",%%mm0 \n\t"/* 0 ; x3 x2 x1 x0*/\ + "movq 8+" #A1 ",%%mm1 \n\t"/* 1 ; x7 x6 x5 x4*/\ + "movq %%mm0,%%mm2 \n\t"/* 2 ; x3 x2 x1 x0*/\ + "movq " #A3 ",%%mm3 \n\t"/* 3 ; w06 w04 w02 w00*/\ + "punpcklwd %%mm1,%%mm0 \n\t"/* x5 x1 x4 x0*/\ + "movq %%mm0,%%mm5 \n\t"/* 5 ; x5 x1 x4 x0*/\ + "punpckldq %%mm0,%%mm0 \n\t"/* x4 x0 x4 x0*/\ + "movq 8+" #A3 ",%%mm4 \n\t"/* 4 ; w07 w05 w03 w01*/\ + "punpckhwd %%mm1,%%mm2 \n\t"/* 1 ; x7 x3 x6 x2*/\ + "pmaddwd %%mm0,%%mm3 \n\t"/* x4*w06+x0*w04 x4*w02+x0*w00*/\ + "movq %%mm2,%%mm6 \n\t"/* 6 ; x7 x3 x6 x2*/\ + "movq 32+" #A3 ",%%mm1 \n\t"/* 1 ; w22 w20 w18 w16*/\ + "punpckldq %%mm2,%%mm2 \n\t"/* x6 x2 x6 x2*/\ + "pmaddwd %%mm2,%%mm4 \n\t"/* x6*w07+x2*w05 x6*w03+x2*w01*/\ + "punpckhdq %%mm5,%%mm5 \n\t"/* x5 x1 x5 x1*/\ + "pmaddwd 16+" #A3 ",%%mm0 \n\t"/* x4*w14+x0*w12 x4*w10+x0*w08*/\ + "punpckhdq %%mm6,%%mm6 \n\t"/* x7 x3 x7 x3*/\ + "movq 40+" #A3 ",%%mm7 \n\t"/* 7 ; w23 w21 w19 w17*/\ + "pmaddwd %%mm5,%%mm1 \n\t"/* x5*w22+x1*w20 x5*w18+x1*w16*/\ + "paddd " #A4 ",%%mm3 \n\t"/* +%4*/\ + "pmaddwd %%mm6,%%mm7 \n\t"/* x7*w23+x3*w21 x7*w19+x3*w17*/\ + "pmaddwd 24+" #A3 ",%%mm2 \n\t"/* x6*w15+x2*w13 x6*w11+x2*w09*/\ + "paddd %%mm4,%%mm3 \n\t"/* 4 ; a1=sum(even1) a0=sum(even0)*/\ + "pmaddwd 48+" #A3 ",%%mm5 \n\t"/* x5*w30+x1*w28 x5*w26+x1*w24*/\ + "movq %%mm3,%%mm4 \n\t"/* 4 ; a1 a0*/\ + "pmaddwd 56+" #A3 ",%%mm6 \n\t"/* x7*w31+x3*w29 x7*w27+x3*w25*/\ + "paddd %%mm7,%%mm1 \n\t"/* 7 ; b1=sum(odd1) b0=sum(odd0)*/\ + "paddd " #A4 ",%%mm0 \n\t"/* +%4*/\ + "psubd %%mm1,%%mm3 \n\t"/* a1-b1 a0-b0*/\ + "psrad $11,%%mm3 \n\t"/* y6=a1-b1 y7=a0-b0*/\ + "paddd %%mm4,%%mm1 \n\t"/* 4 ; a1+b1 a0+b0*/\ + "paddd %%mm2,%%mm0 \n\t"/* 2 ; a3=sum(even3) a2=sum(even2)*/\ + "psrad $11,%%mm1 \n\t"/* y1=a1+b1 y0=a0+b0*/\ + "paddd %%mm6,%%mm5 \n\t"/* 6 ; b3=sum(odd3) b2=sum(odd2)*/\ + "movq %%mm0,%%mm4 \n\t"/* 4 ; a3 a2*/\ + "paddd %%mm5,%%mm0 \n\t"/* a3+b3 a2+b2*/\ + "psubd %%mm5,%%mm4 \n\t"/* 5 ; a3-b3 a2-b2*/\ + "psrad $11,%%mm0 \n\t"/* y3=a3+b3 y2=a2+b2*/\ + "psrad $11,%%mm4 \n\t"/* y4=a3-b3 y5=a2-b2*/\ + "packssdw %%mm0,%%mm1 \n\t"/* 0 ; y3 y2 y1 y0*/\ + "packssdw %%mm3,%%mm4 \n\t"/* 3 ; y6 y7 y4 y5*/\ + "movq %%mm4,%%mm7 \n\t"/* 7 ; y6 y7 y4 y5*/\ + "psrld $16,%%mm4 \n\t"/* 0 y6 0 y4*/\ + "pslld $16,%%mm7 \n\t"/* y7 0 y5 0*/\ + "movq %%mm1," #A2 " \n\t"/* 1 ; save y3 y2 y1 y0*/\ + "por %%mm4,%%mm7 \n\t"/* 4 ; y7 y6 y5 y4*/\ + "movq %%mm7,8 +" #A2 "\n\t"/* 7 ; save y7 y6 y5 y4*/\ + + +//----------------------------------------------------------------------------- +// DCT_8_INV_ROW_XMM( INP, OUT, TABLE, ROUNDER +//----------------------------------------------------------------------------- + +#define DCT_8_INV_ROW_XMM(A1,A2,A3,A4)\ + "movq " #A1 ",%%mm0 \n\t"/* 0 ; x3 x2 x1 x0*/\ + "movq 8+" #A1 ",%%mm1 \n\t"/* 1 ; x7 x6 x5 x4*/\ + "movq %%mm0,%%mm2 \n\t"/* 2 ; x3 x2 x1 x0*/\ + "movq " #A3 ",%%mm3 \n\t"/* 3 ; w05 w04 w01 w00*/\ + "pshufw $0b10001000,%%mm0,%%mm0 \n\t"/* x2 x0 x2 x0*/\ + "movq 8+" #A3 ",%%mm4 \n\t"/* 4 ; w07 w06 w03 w02*/\ + "movq %%mm1,%%mm5 \n\t"/* 5 ; x7 x6 x5 x4*/\ + "pmaddwd %%mm0,%%mm3 \n\t"/* x2*w05+x0*w04 x2*w01+x0*w00*/\ + "movq 32+" #A3 ",%%mm6 \n\t"/* 6 ; w21 w20 w17 w16*/\ + "pshufw $0b10001000,%%mm1,%%mm1 \n\t"/* x6 x4 x6 x4*/\ + "pmaddwd %%mm1,%%mm4 \n\t"/* x6*w07+x4*w06 x6*w03+x4*w02*/\ + "movq 40+" #A3 ",%%mm7 \n\t"/* 7 ; w23 w22 w19 w18*/\ + "pshufw $0b11011101,%%mm2,%%mm2 \n\t"/* x3 x1 x3 x1*/\ + "pmaddwd %%mm2,%%mm6 \n\t"/* x3*w21+x1*w20 x3*w17+x1*w16*/\ + "pshufw $0b11011101,%%mm5,%%mm5 \n\t"/* x7 x5 x7 x5*/\ + "pmaddwd %%mm5,%%mm7 \n\t"/* x7*w23+x5*w22 x7*w19+x5*w18*/\ + "paddd " #A4 ",%%mm3 \n\t"/* +%4*/\ + "pmaddwd 16+" #A3 ",%%mm0 \n\t"/* x2*w13+x0*w12 x2*w09+x0*w08*/\ + "paddd %%mm4,%%mm3 \n\t"/* 4 ; a1=sum(even1) a0=sum(even0)*/\ + "pmaddwd 24+" #A3 ",%%mm1 \n\t"/* x6*w15+x4*w14 x6*w11+x4*w10*/\ + "movq %%mm3,%%mm4 \n\t"/* 4 ; a1 a0*/\ + "pmaddwd 48+" #A3 ",%%mm2 \n\t"/* x3*w29+x1*w28 x3*w25+x1*w24*/\ + "paddd %%mm7,%%mm6 \n\t"/* 7 ; b1=sum(odd1) b0=sum(odd0)*/\ + "pmaddwd 56+" #A3 ",%%mm5 \n\t"/* x7*w31+x5*w30 x7*w27+x5*w26*/\ + "paddd %%mm6,%%mm3 \n\t"/* a1+b1 a0+b0*/\ + "paddd " #A4 ",%%mm0 \n\t"/* +%4*/\ + "psrad $11,%%mm3 \n\t"/* y1=a1+b1 y0=a0+b0*/\ + "paddd %%mm1,%%mm0 \n\t"/* 1 ; a3=sum(even3) a2=sum(even2)*/\ + "psubd %%mm6,%%mm4 \n\t"/* 6 ; a1-b1 a0-b0*/\ + "movq %%mm0,%%mm7 \n\t"/* 7 ; a3 a2*/\ + "paddd %%mm5,%%mm2 \n\t"/* 5 ; b3=sum(odd3) b2=sum(odd2)*/\ + "paddd %%mm2,%%mm0 \n\t"/* a3+b3 a2+b2*/\ + "psrad $11,%%mm4 \n\t"/* y6=a1-b1 y7=a0-b0*/\ + "psubd %%mm2,%%mm7 \n\t"/* 2 ; a3-b3 a2-b2*/\ + "psrad $11,%%mm0 \n\t"/* y3=a3+b3 y2=a2+b2*/\ + "psrad $11,%%mm7 \n\t"/* y4=a3-b3 y5=a2-b2*/\ + "packssdw %%mm0,%%mm3 \n\t"/* 0 ; y3 y2 y1 y0*/\ + "packssdw %%mm4,%%mm7 \n\t"/* 4 ; y6 y7 y4 y5*/\ + "movq %%mm3, " #A2 " \n\t"/* 3 ; save y3 y2 y1 y0*/\ + "pshufw $0b10110001,%%mm7,%%mm7 \n\t"/* y7 y6 y5 y4*/\ + "movq %%mm7,8 +" #A2 "\n\t"/* 7 ; save y7 y6 y5 y4*/\ + + +//----------------------------------------------------------------------------- +// +// The first stage DCT 8x8 - forward DCTs of columns +// +// The %2puts are multiplied +// for rows 0,4 - on cos_4_16, +// for rows 1,7 - on cos_1_16, +// for rows 2,6 - on cos_2_16, +// for rows 3,5 - on cos_3_16 +// and are shifted to the left for rise of accuracy +// +//----------------------------------------------------------------------------- +// +// The 8-point scaled forward DCT algorithm (26a8m) +// +//----------------------------------------------------------------------------- +// +// #define DCT_8_FRW_COL(x, y) +//{ +// short t0, t1, t2, t3, t4, t5, t6, t7; +// short tp03, tm03, tp12, tm12, tp65, tm65; +// short tp465, tm465, tp765, tm765; +// +// t0 = LEFT_SHIFT ( x[0] + x[7] ); +// t1 = LEFT_SHIFT ( x[1] + x[6] ); +// t2 = LEFT_SHIFT ( x[2] + x[5] ); +// t3 = LEFT_SHIFT ( x[3] + x[4] ); +// t4 = LEFT_SHIFT ( x[3] - x[4] ); +// t5 = LEFT_SHIFT ( x[2] - x[5] ); +// t6 = LEFT_SHIFT ( x[1] - x[6] ); +// t7 = LEFT_SHIFT ( x[0] - x[7] ); +// +// tp03 = t0 + t3; +// tm03 = t0 - t3; +// tp12 = t1 + t2; +// tm12 = t1 - t2; +// +// y[0] = tp03 + tp12; +// y[4] = tp03 - tp12; +// +// y[2] = tm03 + tm12 * tg_2_16; +// y[6] = tm03 * tg_2_16 - tm12; +// +// tp65 =(t6 +t5 )*cos_4_16; +// tm65 =(t6 -t5 )*cos_4_16; +// +// tp765 = t7 + tp65; +// tm765 = t7 - tp65; +// tp465 = t4 + tm65; +// tm465 = t4 - tm65; +// +// y[1] = tp765 + tp465 * tg_1_16; +// y[7] = tp765 * tg_1_16 - tp465; +// y[5] = tm765 * tg_3_16 + tm465; +// y[3] = tm765 - tm465 * tg_3_16; +//} +// +//----------------------------------------------------------------------------- + +//----------------------------------------------------------------------------- +// DCT_8_INV_COL_4 INP,OUT +//----------------------------------------------------------------------------- + +#define DCT_8_INV_COL(A1,A2)\ + "movq 2*8(%3),%%mm0\n\t"\ + "movq 16*3+" #A1 ",%%mm3\n\t"\ + "movq %%mm0,%%mm1 \n\t"/* tg_3_16*/\ + "movq 16*5+" #A1 ",%%mm5\n\t"\ + "pmulhw %%mm3,%%mm0 \n\t"/* x3*(tg_3_16-1)*/\ + "movq (%3),%%mm4\n\t"\ + "pmulhw %%mm5,%%mm1 \n\t"/* x5*(tg_3_16-1)*/\ + "movq 16*7+" #A1 ",%%mm7\n\t"\ + "movq %%mm4,%%mm2 \n\t"/* tg_1_16*/\ + "movq 16*1+" #A1 ",%%mm6\n\t"\ + "pmulhw %%mm7,%%mm4 \n\t"/* x7*tg_1_16*/\ + "paddsw %%mm3,%%mm0 \n\t"/* x3*tg_3_16*/\ + "pmulhw %%mm6,%%mm2 \n\t"/* x1*tg_1_16*/\ + "paddsw %%mm3,%%mm1 \n\t"/* x3+x5*(tg_3_16-1)*/\ + "psubsw %%mm5,%%mm0 \n\t"/* x3*tg_3_16-x5 = tm35*/\ + "movq 3*8(%3),%%mm3\n\t"\ + "paddsw %%mm5,%%mm1 \n\t"/* x3+x5*tg_3_16 = tp35*/\ + "paddsw %%mm6,%%mm4 \n\t"/* x1+tg_1_16*x7 = tp17*/\ + "psubsw %%mm7,%%mm2 \n\t"/* x1*tg_1_16-x7 = tm17*/\ + "movq %%mm4,%%mm5 \n\t"/* tp17*/\ + "movq %%mm2,%%mm6 \n\t"/* tm17*/\ + "paddsw %%mm1,%%mm5 \n\t"/* tp17+tp35 = b0*/\ + "psubsw %%mm0,%%mm6 \n\t"/* tm17-tm35 = b3*/\ + "psubsw %%mm1,%%mm4 \n\t"/* tp17-tp35 = t1*/\ + "paddsw %%mm0,%%mm2 \n\t"/* tm17+tm35 = t2*/\ + "movq 1*8(%3),%%mm7\n\t"\ + "movq %%mm4,%%mm1 \n\t"/* t1*/\ + "movq %%mm5,3*16 +" #A2 "\n\t"/* save b0*/\ + "paddsw %%mm2,%%mm1 \n\t"/* t1+t2*/\ + "movq %%mm6,5*16 +" #A2 "\n\t"/* save b3*/\ + "psubsw %%mm2,%%mm4 \n\t"/* t1-t2*/\ + "movq 2*16+" #A1 ",%%mm5\n\t"\ + "movq %%mm7,%%mm0 \n\t"/* tg_2_16*/\ + "movq 6*16+" #A1 ",%%mm6\n\t"\ + "pmulhw %%mm5,%%mm0 \n\t"/* x2*tg_2_16*/\ + "pmulhw %%mm6,%%mm7 \n\t"/* x6*tg_2_16*/\ + "pmulhw %%mm3,%%mm1 \n\t"/* ocos_4_16*(t1+t2) = b1/2*/\ + "movq 0*16+" #A1 ",%%mm2\n\t"\ + "pmulhw %%mm3,%%mm4 \n\t"/* ocos_4_16*(t1-t2) = b2/2*/\ + "psubsw %%mm6,%%mm0 \n\t"/* t2*tg_2_16-x6 = tm26*/\ + "movq %%mm2,%%mm3 \n\t"/* x0*/\ + "movq 4*16+" #A1 ",%%mm6\n\t"\ + "paddsw %%mm5,%%mm7 \n\t"/* x2+x6*tg_2_16 = tp26*/\ + "paddsw %%mm6,%%mm2 \n\t"/* x0+x4 = tp04*/\ + "psubsw %%mm6,%%mm3 \n\t"/* x0-x4 = tm04*/\ + "movq %%mm2,%%mm5 \n\t"/* tp04*/\ + "movq %%mm3,%%mm6 \n\t"/* tm04*/\ + "psubsw %%mm7,%%mm2 \n\t"/* tp04-tp26 = a3*/\ + "paddsw %%mm0,%%mm3 \n\t"/* tm04+tm26 = a1*/\ + "paddsw %%mm1,%%mm1 \n\t"/* b1*/\ + "paddsw %%mm4,%%mm4 \n\t"/* b2*/\ + "paddsw %%mm7,%%mm5 \n\t"/* tp04+tp26 = a0*/\ + "psubsw %%mm0,%%mm6 \n\t"/* tm04-tm26 = a2*/\ + "movq %%mm3,%%mm7 \n\t"/* a1*/\ + "movq %%mm6,%%mm0 \n\t"/* a2*/\ + "paddsw %%mm1,%%mm3 \n\t"/* a1+b1*/\ + "paddsw %%mm4,%%mm6 \n\t"/* a2+b2*/\ + "psraw $6,%%mm3 \n\t"/* dst1*/\ + "psubsw %%mm1,%%mm7 \n\t"/* a1-b1*/\ + "psraw $6,%%mm6 \n\t"/* dst2*/\ + "psubsw %%mm4,%%mm0 \n\t"/* a2-b2*/\ + "movq 3*16+" #A2 ",%%mm1 \n\t"/* load b0*/\ + "psraw $6,%%mm7 \n\t"/* dst6*/\ + "movq %%mm5,%%mm4 \n\t"/* a0*/\ + "psraw $6,%%mm0 \n\t"/* dst5*/\ + "movq %%mm3,1*16+" #A2 "\n\t"\ + "paddsw %%mm1,%%mm5 \n\t"/* a0+b0*/\ + "movq %%mm6,2*16+" #A2 "\n\t"\ + "psubsw %%mm1,%%mm4 \n\t"/* a0-b0*/\ + "movq 5*16+" #A2 ",%%mm3 \n\t"/* load b3*/\ + "psraw $6,%%mm5 \n\t"/* dst0*/\ + "movq %%mm2,%%mm6 \n\t"/* a3*/\ + "psraw $6,%%mm4 \n\t"/* dst7*/\ + "movq %%mm0,5*16+" #A2 "\n\t"\ + "paddsw %%mm3,%%mm2 \n\t"/* a3+b3*/\ + "movq %%mm7,6*16+" #A2 "\n\t"\ + "psubsw %%mm3,%%mm6 \n\t"/* a3-b3*/\ + "movq %%mm5,0*16+" #A2 "\n\t"\ + "psraw $6,%%mm2 \n\t"/* dst3*/\ + "movq %%mm4,7*16+" #A2 "\n\t"\ + "psraw $6,%%mm6 \n\t"/* dst4*/\ + "movq %%mm2,3*16+" #A2 "\n\t"\ + "movq %%mm6,4*16+" #A2 "\n\t" + +//============================================================================= +// Code +//============================================================================= + +//----------------------------------------------------------------------------- +// void idct_mmx(uint16_t block[64]); +//----------------------------------------------------------------------------- + + +void ff_idct_xvid_mmx(short *block){ +asm volatile( + //# Process each row + DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) + DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) + DCT_8_INV_ROW_MMX(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1)) + DCT_8_INV_ROW_MMX(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1)) + DCT_8_INV_ROW_MMX(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1)) + DCT_8_INV_ROW_MMX(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1)) + DCT_8_INV_ROW_MMX(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1)) + DCT_8_INV_ROW_MMX(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1)) + + //# Process the columns (4 at a time) + DCT_8_INV_COL(0(%0), 0(%0)) + DCT_8_INV_COL(8(%0), 8(%0)) + :: "r"(block), "r"(rounder_0), "r"(tab_i_04_mmx), "r"(tg_1_16)); +} + +//----------------------------------------------------------------------------- +// void idct_xmm(uint16_t block[64]); +//----------------------------------------------------------------------------- + + +void ff_idct_xvid_mmx2(short *block){ +asm volatile( + //# Process each row + DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) + DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) + DCT_8_INV_ROW_XMM(2*16(%0), 2*16(%0), 64*2(%2), 8*2(%1)) + DCT_8_INV_ROW_XMM(3*16(%0), 3*16(%0), 64*3(%2), 8*3(%1)) + DCT_8_INV_ROW_XMM(4*16(%0), 4*16(%0), 64*0(%2), 8*4(%1)) + DCT_8_INV_ROW_XMM(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1)) + DCT_8_INV_ROW_XMM(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1)) + DCT_8_INV_ROW_XMM(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1)) + + //# Process the columns (4 at a time) + DCT_8_INV_COL(0(%0), 0(%0)) + DCT_8_INV_COL(8(%0), 8(%0)) + :: "r"(block), "r"(rounder_0), "r"(tab_i_04_xmm), "r"(tg_1_16)); +} + diff --git a/mpeg4/src/libavcodec/i386/mmx.h b/mpeg4/src/libavcodec/i386/mmx.h new file mode 100644 index 0000000000000000000000000000000000000000..df1791823be4e9da40fdc8359afdf556c5f6c8e9 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/mmx.h @@ -0,0 +1,285 @@ +/* + * mmx.h + * Copyright (C) 1997-2001 H. Dietz and R. Fisher + */ +#ifndef AVCODEC_I386MMX_H +#define AVCODEC_I386MMX_H + +#ifdef ARCH_X86_64 +# define REG_a "rax" +# define REG_b "rbx" +# define REG_c "rcx" +# define REG_d "rdx" +# define REG_D "rdi" +# define REG_S "rsi" +# define PTR_SIZE "8" +#else +# define REG_a "eax" +# define REG_b "ebx" +# define REG_c "ecx" +# define REG_d "edx" +# define REG_D "edi" +# define REG_S "esi" +# define PTR_SIZE "4" +#endif + +/* + * The type of an value that fits in an MMX register (note that long + * long constant values MUST be suffixed by LL and unsigned long long + * values by ULL, lest they be truncated by the compiler) + */ + +typedef union { + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} mmx_t; /* On an 8-byte (64-bit) boundary */ + + +#define mmx_i2r(op,imm,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_m2r(op,mem,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op,reg,mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op,regs,regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + + +#define emms() __asm__ __volatile__ ("emms") + +#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) +#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) +#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) + +#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) +#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) +#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) + +#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) +#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) +#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) +#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) + +#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) +#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) + +#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) +#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) +#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) +#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) +#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) +#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) + +#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) +#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) +#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) +#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) + +#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) +#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) +#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) +#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) + +#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) +#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) + +#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) +#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) + +#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) +#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) +#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) + +#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) +#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) +#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) + +#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) +#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) + +#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) +#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) + +#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) +#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) + +#define por_m2r(var,reg) mmx_m2r (por, var, reg) +#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) + +#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) +#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) +#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) +#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) +#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) +#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) +#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) +#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) +#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) + +#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) +#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) +#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) +#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) +#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) +#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) + +#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) +#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) +#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) +#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) +#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) +#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) +#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) +#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) +#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) + +#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) +#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) +#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) +#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) +#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) +#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) + +#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) +#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) +#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) +#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) + +#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) +#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) +#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) +#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) + +#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) +#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) +#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) +#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) +#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) +#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) + +#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) +#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) +#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) +#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) +#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) +#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) + +#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) +#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) + + +/* 3DNOW extensions */ + +#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) +#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) + + +/* AMD MMX extensions - also available in intel SSE */ + + +#define mmx_m2ri(op,mem,reg,imm) \ + __asm__ __volatile__ (#op " %1, %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem), "X" (imm)) +#define mmx_r2ri(op,regs,regd,imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "X" (imm) ) + +#define mmx_fetch(mem,hint) \ + __asm__ __volatile__ ("prefetch" #hint " %0" \ + : /* nothing */ \ + : "X" (mem)) + + +#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) + +#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) + +#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) +#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) +#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) +#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) + +#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) + +#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) + +#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) +#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) + +#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) +#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) + +#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) +#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) + +#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) +#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) + +#define pmovmskb(mmreg,reg) \ + __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) + +#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) +#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) + +#define prefetcht0(mem) mmx_fetch (mem, t0) +#define prefetcht1(mem) mmx_fetch (mem, t1) +#define prefetcht2(mem) mmx_fetch (mem, t2) +#define prefetchnta(mem) mmx_fetch (mem, nta) + +#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) +#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) + +#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) +#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) + +#define sfence() __asm__ __volatile__ ("sfence\n\t") + +/* SSE2 */ +#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) +#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm) +#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm) +#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm) + +#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm) + +#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg) +#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var) +#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd) +#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg) +#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var) +#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd) + +#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var) + +#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg) +#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg) + +#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd) +#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd) + + +#endif /* AVCODEC_I386MMX_H */ diff --git a/mpeg4/src/libavcodec/i386/motion_est_mmx.c b/mpeg4/src/libavcodec/i386/motion_est_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..c14b7938482d2290676951d0c3d5d5e94244273d --- /dev/null +++ b/mpeg4/src/libavcodec/i386/motion_est_mmx.c @@ -0,0 +1,406 @@ +/* + * MMX optimized motion estimation + * Copyright (c) 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * mostly by Michael Niedermayer + */ +#include "../dsputil.h" +#include "mmx.h" + +static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ +0x0000000000000000ULL, +0x0001000100010001ULL, +0x0002000200020002ULL, +}; + +static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; + +static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) +{ + long len= -(stride*h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" + "add %3, %%"REG_a" \n\t" + "psubusb %%mm0, %%mm2 \n\t" + "psubusb %%mm4, %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm5 \n\t" + "psubusb %%mm1, %%mm3 \n\t" + "psubusb %%mm5, %%mm1 \n\t" + "por %%mm2, %%mm0 \n\t" + "por %%mm1, %%mm3 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "add %3, %%"REG_a" \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) + ); +} + +static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +{ + long len= -(stride*h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" + "psadbw %%mm2, %%mm0 \n\t" + "add %3, %%"REG_a" \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "psadbw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "add %3, %%"REG_a" \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) + ); +} + +static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) +{ + long len= -(stride*h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" + "pavgb %%mm2, %%mm0 \n\t" + "movq (%3, %%"REG_a"), %%mm2 \n\t" + "psadbw %%mm2, %%mm0 \n\t" + "add %4, %%"REG_a" \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "pavgb %%mm1, %%mm3 \n\t" + "movq (%3, %%"REG_a"), %%mm1 \n\t" + "psadbw %%mm1, %%mm3 \n\t" + "paddw %%mm3, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "add %4, %%"REG_a" \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) + ); +} + +static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +{ //FIXME reuse src + long len= -(stride*h); + asm volatile( + ".balign 16 \n\t" + "movq "MANGLE(bone)", %%mm5 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" + "movq 1(%1, %%"REG_a"), %%mm1 \n\t" + "movq 1(%2, %%"REG_a"), %%mm3 \n\t" + "pavgb %%mm2, %%mm0 \n\t" + "pavgb %%mm1, %%mm3 \n\t" + "psubusb %%mm5, %%mm3 \n\t" + "pavgb %%mm3, %%mm0 \n\t" + "movq (%3, %%"REG_a"), %%mm2 \n\t" + "psadbw %%mm2, %%mm0 \n\t" + "add %4, %%"REG_a" \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq 1(%2, %%"REG_a"), %%mm4 \n\t" + "pavgb %%mm3, %%mm1 \n\t" + "pavgb %%mm4, %%mm2 \n\t" + "psubusb %%mm5, %%mm2 \n\t" + "pavgb %%mm1, %%mm2 \n\t" + "movq (%3, %%"REG_a"), %%mm1 \n\t" + "psadbw %%mm1, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "add %4, %%"REG_a" \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) + ); +} + +static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) +{ + long len= -(stride*h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddw %%mm0, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "movq (%3, %%"REG_a"), %%mm4 \n\t" + "movq (%3, %%"REG_a"), %%mm2 \n\t" + "paddw %%mm5, %%mm1 \n\t" + "paddw %%mm5, %%mm3 \n\t" + "psrlw $1, %%mm1 \n\t" + "psrlw $1, %%mm3 \n\t" + "packuswb %%mm3, %%mm1 \n\t" + "psubusb %%mm1, %%mm4 \n\t" + "psubusb %%mm2, %%mm1 \n\t" + "por %%mm4, %%mm1 \n\t" + "movq %%mm1, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "add %4, %%"REG_a" \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) + ); +} + +static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) +{ + long len= -(stride*h); + asm volatile( + ".balign 16 \n\t" + "1: \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm1 \n\t" + "movq %%mm0, %%mm4 \n\t" + "movq %%mm1, %%mm2 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm1 \n\t" + "punpckhbw %%mm7, %%mm4 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm2, %%mm4 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq 1(%2, %%"REG_a"), %%mm3 \n\t" + "movq %%mm2, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm1 \n\t" + "paddw %%mm0, %%mm2 \n\t" + "paddw %%mm4, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm3 \n\t" + "punpckhbw %%mm7, %%mm4 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm4, %%mm1 \n\t" + "movq (%3, %%"REG_a"), %%mm3 \n\t" + "movq (%3, %%"REG_a"), %%mm4 \n\t" + "paddw %%mm5, %%mm2 \n\t" + "paddw %%mm5, %%mm1 \n\t" + "psrlw $2, %%mm2 \n\t" + "psrlw $2, %%mm1 \n\t" + "packuswb %%mm1, %%mm2 \n\t" + "psubusb %%mm2, %%mm3 \n\t" + "psubusb %%mm4, %%mm2 \n\t" + "por %%mm3, %%mm2 \n\t" + "movq %%mm2, %%mm0 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" + "punpckhbw %%mm7, %%mm2 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "add %4, %%"REG_a" \n\t" + " js 1b \n\t" + : "+a" (len) + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) + ); +} + +static inline int sum_mmx(void) +{ + int ret; + asm volatile( + "movq %%mm6, %%mm0 \n\t" + "psrlq $32, %%mm6 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "movq %%mm6, %%mm0 \n\t" + "psrlq $16, %%mm6 \n\t" + "paddw %%mm0, %%mm6 \n\t" + "movd %%mm6, %0 \n\t" + : "=r" (ret) + ); + return ret&0xFFFF; +} + +static inline int sum_mmx2(void) +{ + int ret; + asm volatile( + "movd %%mm6, %0 \n\t" + : "=r" (ret) + ); + return ret; +} + + +#define PIX_SAD(suf)\ +static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + assert(h==8);\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t":);\ +\ + sad8_1_ ## suf(blk1, blk2, stride, 8);\ +\ + return sum_ ## suf();\ +}\ +static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + assert(h==8);\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\ +\ + return sum_ ## suf();\ +}\ +\ +static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + assert(h==8);\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\ +\ + return sum_ ## suf();\ +}\ +\ +static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + assert(h==8);\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[2]) \ + );\ +\ + sad8_4_ ## suf(blk1, blk2, stride, 8);\ +\ + return sum_ ## suf();\ +}\ +\ +static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t":);\ +\ + sad8_1_ ## suf(blk1 , blk2 , stride, h);\ + sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ +\ + return sum_ ## suf();\ +}\ +static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, h);\ + sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\ +\ + return sum_ ## suf();\ +}\ +static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[1]) \ + );\ +\ + sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, h);\ + sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\ +\ + return sum_ ## suf();\ +}\ +static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t"\ + "movq %0, %%mm5 \n\t"\ + :: "m"(round_tab[2]) \ + );\ +\ + sad8_4_ ## suf(blk1 , blk2 , stride, h);\ + sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ +\ + return sum_ ## suf();\ +}\ + +PIX_SAD(mmx) +PIX_SAD(mmx2) + +void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) +{ + if (mm_flags & MM_MMX) { + c->pix_abs[0][0] = sad16_mmx; + c->pix_abs[0][1] = sad16_x2_mmx; + c->pix_abs[0][2] = sad16_y2_mmx; + c->pix_abs[0][3] = sad16_xy2_mmx; + c->pix_abs[1][0] = sad8_mmx; + c->pix_abs[1][1] = sad8_x2_mmx; + c->pix_abs[1][2] = sad8_y2_mmx; + c->pix_abs[1][3] = sad8_xy2_mmx; + + c->sad[0]= sad16_mmx; + c->sad[1]= sad8_mmx; + } + if (mm_flags & MM_MMXEXT) { + c->pix_abs[0][0] = sad16_mmx2; + c->pix_abs[1][0] = sad8_mmx2; + + c->sad[0]= sad16_mmx2; + c->sad[1]= sad8_mmx2; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->pix_abs[0][1] = sad16_x2_mmx2; + c->pix_abs[0][2] = sad16_y2_mmx2; + c->pix_abs[0][3] = sad16_xy2_mmx2; + c->pix_abs[1][1] = sad8_x2_mmx2; + c->pix_abs[1][2] = sad8_y2_mmx2; + c->pix_abs[1][3] = sad8_xy2_mmx2; + } + } +} diff --git a/mpeg4/src/libavcodec/i386/mpegvideo_mmx.c b/mpeg4/src/libavcodec/i386/mpegvideo_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..f83df3a194e6503eef86217df289ebb7201f1e7e --- /dev/null +++ b/mpeg4/src/libavcodec/i386/mpegvideo_mmx.c @@ -0,0 +1,723 @@ +/* + * The simplest mpeg encoder (well, it was the simplest!) + * Copyright (c) 2000,2001 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Optimized for ia32 cpus by Nick Kurshev + * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer + */ + +#include "../dsputil.h" +#include "../mpegvideo.h" +#include "../avcodec.h" +#include "mmx.h" + +extern uint8_t zigzag_direct_noperm[64]; +extern uint16_t inv_zigzag_direct16[64]; + +static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; +static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; + + +static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + long level, qmul, qadd, nCoeffs; + + qmul = qscale << 1; + + assert(s->block_last_index[n]>=0 || s->h263_aic); + + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + level= block[0]; + } + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; +//printf("%d %d ", qmul, qadd); +asm volatile( + "movd %1, %%mm6 \n\t" //qmul + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "movd %2, %%mm5 \n\t" //qadd + "pxor %%mm7, %%mm7 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "psubw %%mm5, %%mm7 \n\t" + "pxor %%mm4, %%mm4 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0, %3), %%mm0 \n\t" + "movq 8(%0, %3), %%mm1 \n\t" + + "pmullw %%mm6, %%mm0 \n\t" + "pmullw %%mm6, %%mm1 \n\t" + + "movq (%0, %3), %%mm2 \n\t" + "movq 8(%0, %3), %%mm3 \n\t" + + "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + + "paddw %%mm7, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + + "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 + + "pandn %%mm2, %%mm0 \n\t" + "pandn %%mm3, %%mm1 \n\t" + + "movq %%mm0, (%0, %3) \n\t" + "movq %%mm1, 8(%0, %3) \n\t" + + "add $16, %3 \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) + : "memory" + ); + block[0]= level; +} + + +static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + long qmul, qadd, nCoeffs; + + qmul = qscale << 1; + qadd = (qscale - 1) | 1; + + assert(s->block_last_index[n]>=0 || s->h263_aic); + + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; +//printf("%d %d ", qmul, qadd); +asm volatile( + "movd %1, %%mm6 \n\t" //qmul + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "movd %2, %%mm5 \n\t" //qadd + "pxor %%mm7, %%mm7 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "psubw %%mm5, %%mm7 \n\t" + "pxor %%mm4, %%mm4 \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0, %3), %%mm0 \n\t" + "movq 8(%0, %3), %%mm1 \n\t" + + "pmullw %%mm6, %%mm0 \n\t" + "pmullw %%mm6, %%mm1 \n\t" + + "movq (%0, %3), %%mm2 \n\t" + "movq 8(%0, %3), %%mm3 \n\t" + + "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + + "paddw %%mm7, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + + "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 + + "pandn %%mm2, %%mm0 \n\t" + "pandn %%mm3, %%mm1 \n\t" + + "movq %%mm0, (%0, %3) \n\t" + "movq %%mm1, 8(%0, %3) \n\t" + + "add $16, %3 \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) + : "memory" + ); +} + + +/* + NK: + Note: looking at PARANOID: + "enable all paranoid tests for rounding, overflows, etc..." + +#ifdef PARANOID + if (level < -2048 || level > 2047) + fprintf(stderr, "unquant error %d %d\n", i, level); +#endif + We can suppose that result of two multiplications can't be greate of 0xFFFF + i.e. is 16-bit, so we use here only PMULLW instruction and can avoid + a complex multiplication. +===================================================== + Full formula for multiplication of 2 integer numbers + which are represent as high:low words: + input: value1 = high1:low1 + value2 = high2:low2 + output: value3 = value1*value2 + value3=high3:low3 (on overflow: modulus 2^32 wrap-around) + this mean that for 0x123456 * 0x123456 correct result is 0x766cb0ce4 + but this algorithm will compute only 0x66cb0ce4 + this limited by 16-bit size of operands + --------------------------------- + tlow1 = high1*low2 + tlow2 = high2*low1 + tlow1 = tlow1 + tlow2 + high3:low3 = low1*low2 + high3 += tlow1 +*/ +static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + long nCoeffs; + const uint16_t *quant_matrix; + int block0; + + assert(s->block_last_index[n]>=0); + + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; + + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + /* XXX: only mpeg1 */ + quant_matrix = s->intra_matrix; +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $15, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "mov %3, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q + "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "psraw $3, %%mm0 \n\t" + "psraw $3, %%mm1 \n\t" + "psubw %%mm7, %%mm0 \n\t" + "psubw %%mm7, %%mm1 \n\t" + "por %%mm7, %%mm0 \n\t" + "por %%mm7, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" + + "add $16, %%"REG_a" \n\t" + "js 1b \n\t" + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) + : "%"REG_a, "memory" + ); + block[0]= block0; +} + +static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + long nCoeffs; + const uint16_t *quant_matrix; + + assert(s->block_last_index[n]>=0); + + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; + + quant_matrix = s->inter_matrix; +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $15, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "mov %3, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 + "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 + "paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1 + "paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1 + "pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q + "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "psraw $4, %%mm0 \n\t" + "psraw $4, %%mm1 \n\t" + "psubw %%mm7, %%mm0 \n\t" + "psubw %%mm7, %%mm1 \n\t" + "por %%mm7, %%mm0 \n\t" + "por %%mm7, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" + + "add $16, %%"REG_a" \n\t" + "js 1b \n\t" + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) + : "%"REG_a, "memory" + ); +} + +static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + long nCoeffs; + const uint16_t *quant_matrix; + int block0; + + assert(s->block_last_index[n]>=0); + + if(s->alternate_scan) nCoeffs= 63; //FIXME + else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + quant_matrix = s->intra_matrix; +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlw $15, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "mov %3, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q + "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "psraw $3, %%mm0 \n\t" + "psraw $3, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" + + "add $16, %%"REG_a" \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) + : "%"REG_a, "memory" + ); + block[0]= block0; + //Note, we dont do mismatch control for intra as errors cannot accumulate +} + +static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + long nCoeffs; + const uint16_t *quant_matrix; + + assert(s->block_last_index[n]>=0); + + if(s->alternate_scan) nCoeffs= 63; //FIXME + else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + + quant_matrix = s->inter_matrix; +asm volatile( + "pcmpeqw %%mm7, %%mm7 \n\t" + "psrlq $48, %%mm7 \n\t" + "movd %2, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "mov %3, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" + "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] + "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + "pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // abs(block[i]) + "psubw %%mm3, %%mm1 \n\t" // abs(block[i]) + "paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2 + "paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2 + "pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*2*q + "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*2*q + "paddw %%mm4, %%mm0 \n\t" // (abs(block[i])*2 + 1)*q + "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q + "pxor %%mm4, %%mm4 \n\t" + "pxor %%mm5, %%mm5 \n\t" // FIXME slow + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 + "psrlw $4, %%mm0 \n\t" + "psrlw $4, %%mm1 \n\t" + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" + "psubw %%mm3, %%mm1 \n\t" + "pandn %%mm0, %%mm4 \n\t" + "pandn %%mm1, %%mm5 \n\t" + "pxor %%mm4, %%mm7 \n\t" + "pxor %%mm5, %%mm7 \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" + + "add $16, %%"REG_a" \n\t" + "jng 1b \n\t" + "movd 124(%0, %3), %%mm0 \n\t" + "movq %%mm7, %%mm6 \n\t" + "psrlq $32, %%mm7 \n\t" + "pxor %%mm6, %%mm7 \n\t" + "movq %%mm7, %%mm6 \n\t" + "psrlq $16, %%mm7 \n\t" + "pxor %%mm6, %%mm7 \n\t" + "pslld $31, %%mm7 \n\t" + "psrlq $15, %%mm7 \n\t" + "pxor %%mm7, %%mm0 \n\t" + "movd %%mm0, 124(%0, %3) \n\t" + + ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) + : "%"REG_a, "memory" + ); +} + +/* draw the edges of width 'w' of an image of size width, height + this mmx version can only handle w==8 || w==16 */ +static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) +{ + uint8_t *ptr, *last_line; + int i; + + last_line = buf + (height - 1) * wrap; + /* left and right */ + ptr = buf; + if(w==8) + { + asm volatile( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (ptr) + : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) + ); + } + else + { + asm volatile( + "1: \n\t" + "movd (%0), %%mm0 \n\t" + "punpcklbw %%mm0, %%mm0 \n\t" + "punpcklwd %%mm0, %%mm0 \n\t" + "punpckldq %%mm0, %%mm0 \n\t" + "movq %%mm0, -8(%0) \n\t" + "movq %%mm0, -16(%0) \n\t" + "movq -8(%0, %2), %%mm1 \n\t" + "punpckhbw %%mm1, %%mm1 \n\t" + "punpckhwd %%mm1, %%mm1 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm1, (%0, %2) \n\t" + "movq %%mm1, 8(%0, %2) \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (ptr) + : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) + ); + } + + for(i=0;imb_intra; + int *sum= s->dct_error_sum[intra]; + uint16_t *offset= s->dct_offset[intra]; + + s->dct_count[intra]++; + + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "1: \n\t" + "pxor %%mm0, %%mm0 \n\t" + "pxor %%mm1, %%mm1 \n\t" + "movq (%0), %%mm2 \n\t" + "movq 8(%0), %%mm3 \n\t" + "pcmpgtw %%mm2, %%mm0 \n\t" + "pcmpgtw %%mm3, %%mm1 \n\t" + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + "psubw %%mm0, %%mm2 \n\t" + "psubw %%mm1, %%mm3 \n\t" + "movq %%mm2, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psubusw (%2), %%mm2 \n\t" + "psubusw 8(%2), %%mm3 \n\t" + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + "psubw %%mm0, %%mm2 \n\t" + "psubw %%mm1, %%mm3 \n\t" + "movq %%mm2, (%0) \n\t" + "movq %%mm3, 8(%0) \n\t" + "movq %%mm4, %%mm2 \n\t" + "movq %%mm5, %%mm3 \n\t" + "punpcklwd %%mm7, %%mm4 \n\t" + "punpckhwd %%mm7, %%mm2 \n\t" + "punpcklwd %%mm7, %%mm5 \n\t" + "punpckhwd %%mm7, %%mm3 \n\t" + "paddd (%1), %%mm4 \n\t" + "paddd 8(%1), %%mm2 \n\t" + "paddd 16(%1), %%mm5 \n\t" + "paddd 24(%1), %%mm3 \n\t" + "movq %%mm4, (%1) \n\t" + "movq %%mm2, 8(%1) \n\t" + "movq %%mm5, 16(%1) \n\t" + "movq %%mm3, 24(%1) \n\t" + "add $16, %0 \n\t" + "add $32, %1 \n\t" + "add $16, %2 \n\t" + "cmp %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (block), "+r" (sum), "+r" (offset) + : "r"(block+64) + ); +} + +static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ + const int intra= s->mb_intra; + int *sum= s->dct_error_sum[intra]; + uint16_t *offset= s->dct_offset[intra]; + + s->dct_count[intra]++; + + asm volatile( + "pxor %%xmm7, %%xmm7 \n\t" + "1: \n\t" + "pxor %%xmm0, %%xmm0 \n\t" + "pxor %%xmm1, %%xmm1 \n\t" + "movdqa (%0), %%xmm2 \n\t" + "movdqa 16(%0), %%xmm3 \n\t" + "pcmpgtw %%xmm2, %%xmm0 \n\t" + "pcmpgtw %%xmm3, %%xmm1 \n\t" + "pxor %%xmm0, %%xmm2 \n\t" + "pxor %%xmm1, %%xmm3 \n\t" + "psubw %%xmm0, %%xmm2 \n\t" + "psubw %%xmm1, %%xmm3 \n\t" + "movdqa %%xmm2, %%xmm4 \n\t" + "movdqa %%xmm3, %%xmm5 \n\t" + "psubusw (%2), %%xmm2 \n\t" + "psubusw 16(%2), %%xmm3 \n\t" + "pxor %%xmm0, %%xmm2 \n\t" + "pxor %%xmm1, %%xmm3 \n\t" + "psubw %%xmm0, %%xmm2 \n\t" + "psubw %%xmm1, %%xmm3 \n\t" + "movdqa %%xmm2, (%0) \n\t" + "movdqa %%xmm3, 16(%0) \n\t" + "movdqa %%xmm4, %%xmm6 \n\t" + "movdqa %%xmm5, %%xmm0 \n\t" + "punpcklwd %%xmm7, %%xmm4 \n\t" + "punpckhwd %%xmm7, %%xmm6 \n\t" + "punpcklwd %%xmm7, %%xmm5 \n\t" + "punpckhwd %%xmm7, %%xmm0 \n\t" + "paddd (%1), %%xmm4 \n\t" + "paddd 16(%1), %%xmm6 \n\t" + "paddd 32(%1), %%xmm5 \n\t" + "paddd 48(%1), %%xmm0 \n\t" + "movdqa %%xmm4, (%1) \n\t" + "movdqa %%xmm6, 16(%1) \n\t" + "movdqa %%xmm5, 32(%1) \n\t" + "movdqa %%xmm0, 48(%1) \n\t" + "add $32, %0 \n\t" + "add $64, %1 \n\t" + "add $32, %2 \n\t" + "cmp %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (block), "+r" (sum), "+r" (offset) + : "r"(block+64) + ); +} + +#undef HAVE_MMX2 +#define RENAME(a) a ## _MMX +#define RENAMEl(a) a ## _mmx +#include "mpegvideo_mmx_template.c" + +#define HAVE_MMX2 +#undef RENAME +#undef RENAMEl +#define RENAME(a) a ## _MMX2 +#define RENAMEl(a) a ## _mmx2 +#include "mpegvideo_mmx_template.c" + +#undef RENAME +#undef RENAMEl +#define RENAME(a) a ## _SSE2 +#define RENAMEl(a) a ## _sse2 +#include "mpegvideo_mmx_template.c" + +void MPV_common_init_mmx(MpegEncContext *s) +{ + if (mm_flags & MM_MMX) { + const int dct_algo = s->avctx->dct_algo; + + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; + s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; + s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; + s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; + s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; + + draw_edges = draw_edges_mmx; + + if (mm_flags & MM_SSE2) { + s->denoise_dct= denoise_dct_sse2; + } else { + s->denoise_dct= denoise_dct_mmx; + } + + if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ + if(mm_flags & MM_SSE2){ + s->dct_quantize= dct_quantize_SSE2; + } else if(mm_flags & MM_MMXEXT){ + s->dct_quantize= dct_quantize_MMX2; + } else { + s->dct_quantize= dct_quantize_MMX; + } + } + } +} diff --git a/mpeg4/src/libavcodec/i386/mpegvideo_mmx_template.c b/mpeg4/src/libavcodec/i386/mpegvideo_mmx_template.c new file mode 100644 index 0000000000000000000000000000000000000000..2c50df232df8967a44b41ebd8b166b0782506672 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/mpegvideo_mmx_template.c @@ -0,0 +1,345 @@ +/* + * MPEG video MMX templates + * + * Copyright (c) 2002 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#undef SPREADW +#undef PMAXW +#ifdef HAVE_MMX2 +#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" +#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" +#define PMAX(a,b) \ + "pshufw $0x0E," #a ", " #b " \n\t"\ + PMAXW(b, a)\ + "pshufw $0x01," #a ", " #b " \n\t"\ + PMAXW(b, a) +#else +#define SPREADW(a) \ + "punpcklwd " #a ", " #a " \n\t"\ + "punpcklwd " #a ", " #a " \n\t" +#define PMAXW(a,b) \ + "psubusw " #a ", " #b " \n\t"\ + "paddw " #a ", " #b " \n\t" +#define PMAX(a,b) \ + "movq " #a ", " #b " \n\t"\ + "psrlq $32, " #a " \n\t"\ + PMAXW(b, a)\ + "movq " #a ", " #b " \n\t"\ + "psrlq $16, " #a " \n\t"\ + PMAXW(b, a) + +#endif + +static int RENAME(dct_quantize)(MpegEncContext *s, + DCTELEM *block, int n, + int qscale, int *overflow) +{ + long last_non_zero_p1; + int level=0, q; //=0 is cuz gcc says uninitalized ... + const uint16_t *qmat, *bias; + DECLARE_ALIGNED_8(int16_t, temp_block[64]); + + assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly? + + //s->fdct (block); + RENAMEl(ff_fdct) (block); //cant be anything else ... + + if(s->dct_error_sum) + s->denoise_dct(s, block); + + if (s->mb_intra) { + int dummy; + if (n < 4) + q = s->y_dc_scale; + else + q = s->c_dc_scale; + /* note: block[0] is assumed to be positive */ + if (!s->h263_aic) { +#if 1 + asm volatile ( + "mul %%ecx \n\t" + : "=d" (level), "=a"(dummy) + : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) + ); +#else + asm volatile ( + "xorl %%edx, %%edx \n\t" + "divw %%cx \n\t" + "movzwl %%ax, %%eax \n\t" + : "=a" (level) + : "a" ((block[0]>>2) + q), "c" (q<<1) + : "%edx" + ); +#endif + } else + /* For AIC we skip quant/dequant of INTRADC */ + level = (block[0] + 4)>>3; + + block[0]=0; //avoid fake overflow +// temp_block[0] = (block[0] + (q >> 1)) / q; + last_non_zero_p1 = 1; + bias = s->q_intra_matrix16[qscale][1]; + qmat = s->q_intra_matrix16[qscale][0]; + } else { + last_non_zero_p1 = 0; + bias = s->q_inter_matrix16[qscale][1]; + qmat = s->q_inter_matrix16[qscale][0]; + } + + if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ + + asm volatile( + "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 + SPREADW(%%mm3) + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "movq (%2), %%mm5 \n\t" // qmat[0] + "pxor %%mm6, %%mm6 \n\t" + "psubw (%3), %%mm6 \n\t" // -bias[0] + "mov $-128, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movq %%mm0, (%5, %%"REG_a") \n\t" + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%"REG_a"), %%mm1 \n\t" + "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 + "pandn %%mm1, %%mm0 \n\t" + PMAXW(%%mm0, %%mm3) + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + PMAX(%%mm3, %%mm0) + "movd %%mm3, %%"REG_a" \n\t" + "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 + : "+a" (last_non_zero_p1) + : "r" (block+64), "r" (qmat), "r" (bias), + "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + ); + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff + SPREADW(%%mm1) + "psubusw %%mm1, %%mm4 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "movd %%mm4, %0 \n\t" // *overflow + : "=g" (*overflow) + : "g" (s->max_qcoeff) + ); + }else{ // FMT_H263 + asm volatile( + "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 + SPREADW(%%mm3) + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "mov $-128, %%"REG_a" \n\t" + ".balign 16 \n\t" + "1: \n\t" + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] + "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movq %%mm0, (%5, %%"REG_a") \n\t" + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%"REG_a"), %%mm1 \n\t" + "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 + "pandn %%mm1, %%mm0 \n\t" + PMAXW(%%mm0, %%mm3) + "add $8, %%"REG_a" \n\t" + " js 1b \n\t" + PMAX(%%mm3, %%mm0) + "movd %%mm3, %%"REG_a" \n\t" + "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 + : "+a" (last_non_zero_p1) + : "r" (block+64), "r" (qmat+64), "r" (bias+64), + "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + ); + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff + SPREADW(%%mm1) + "psubusw %%mm1, %%mm4 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "movd %%mm4, %0 \n\t" // *overflow + : "=g" (*overflow) + : "g" (s->max_qcoeff) + ); + } + + if(s->mb_intra) block[0]= level; + else block[0]= temp_block[0]; + + if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){ + if(last_non_zero_p1 <= 1) goto end; + block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; + block[0x20] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02]; + block[0x09] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11]; + block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12]; + block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04]; + block[0x0C] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13]; + block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21]; + block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30]; + block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14]; + block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06]; + block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E]; + block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A]; + block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38]; + block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32]; + block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16]; + block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17]; + block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25]; + block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; + block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D]; + block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C]; + block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; + block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; + block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){ + if(last_non_zero_p1 <= 1) goto end; + block[0x04] = temp_block[0x01]; + block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02]; + block[0x05] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11]; + block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x1C] = temp_block[0x19]; + block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B]; + block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13]; + block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21]; + block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; + block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14]; + block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06]; + block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E]; + block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A]; + block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38]; + block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32]; + block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16]; + block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; + block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25]; + block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B]; + block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D]; + block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C]; + block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36]; + block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; + block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + }else{ + if(last_non_zero_p1 <= 1) goto end; + block[0x01] = temp_block[0x01]; + block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02]; + block[0x03] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11]; + block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x19] = temp_block[0x19]; + block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B]; + block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13]; + block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21]; + block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; + block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14]; + block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06]; + block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E]; + block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A]; + block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38]; + block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32]; + block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16]; + block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; + block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25]; + block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; + block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; + block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C]; + block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; + block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; + block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + } + end: +/* + for(i=0; i + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "../dsputil.h" +#include "../simple_idct.h" + +/* +23170.475006 +22725.260826 +21406.727617 +19265.545870 +16384.000000 +12872.826198 +8866.956905 +4520.335430 +*/ +#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#if 0 +#define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#else +#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5 +#endif +#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 +#define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + +#define ROW_SHIFT 11 +#define COL_SHIFT 20 // 6 + +static const uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000ULL; +static const uint64_t attribute_used __attribute__((aligned(8))) d40000= 0x0000000000040000ULL; + +static const int16_t __attribute__((aligned(8))) coeffs[]= { + 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0, +// 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0, +// 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16), + 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0, + // the 1 = ((1<<(COL_SHIFT-1))/C4)<> COL_SHIFT; + col[8*1] = (a1 + b1) >> COL_SHIFT; + col[8*2] = (a2 + b2) >> COL_SHIFT; + col[8*3] = (a3 + b3) >> COL_SHIFT; + col[8*4] = (a3 - b3) >> COL_SHIFT; + col[8*5] = (a2 - b2) >> COL_SHIFT; + col[8*6] = (a1 - b1) >> COL_SHIFT; + col[8*7] = (a0 - b0) >> COL_SHIFT; +} + +static void inline idctRow (int16_t * output, int16_t * input) +{ + int16_t row[8]; + + int a0, a1, a2, a3, b0, b1, b2, b3; + const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 + +row[0] = input[0]; +row[2] = input[1]; +row[4] = input[4]; +row[6] = input[5]; +row[1] = input[8]; +row[3] = input[9]; +row[5] = input[12]; +row[7] = input[13]; + + if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) { + row[0] = row[1] = row[2] = row[3] = row[4] = + row[5] = row[6] = row[7] = row[0]<<3; + output[0] = row[0]; + output[2] = row[1]; + output[4] = row[2]; + output[6] = row[3]; + output[8] = row[4]; + output[10] = row[5]; + output[12] = row[6]; + output[14] = row[7]; + return; + } + + a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1)); + a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1)); + a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1)); + a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1)); + + b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; + b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; + b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; + b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; + + output[0] = row[0]; + output[2] = row[1]; + output[4] = row[2]; + output[6] = row[3]; + output[8] = row[4]; + output[10] = row[5]; + output[12] = row[6]; + output[14] = row[7]; +} +#endif + +static inline void idct(int16_t *block) +{ + int64_t __attribute__((aligned(8))) align_tmp[16]; + int16_t * const temp= (int16_t*)align_tmp; + + asm volatile( +#if 0 //Alternative, simpler variant + +#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + #rounder ", %%mm0 \n\t"\ + "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\ + "paddd %%mm0, %%mm0 \n\t" \ + "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\ + "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\ + "movq %%mm7, " #dst " \n\t"\ + "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "movq %%mm2, 24+" #dst " \n\t"\ + "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\ + "movq %%mm2, 8+" #dst " \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\ + "movq %%mm4, 16+" #dst " \n\t"\ + +#define COL_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + #rounder ", %%mm0 \n\t"\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\ + "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\ + "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm7, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm2, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm2, 32+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "movd %%mm4, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t"\ + + +#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq "MANGLE(wm1010)", %%mm4 \n\t"\ + "pand %%mm0, %%mm4 \n\t"\ + "por %%mm1, %%mm4 \n\t"\ + "por %%mm2, %%mm4 \n\t"\ + "por %%mm3, %%mm4 \n\t"\ + "packssdw %%mm4,%%mm4 \n\t"\ + "movd %%mm4, %%eax \n\t"\ + "orl %%eax, %%eax \n\t"\ + "jz 1f \n\t"\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + #rounder ", %%mm0 \n\t"\ + "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\ + "paddd %%mm0, %%mm0 \n\t" \ + "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\ + "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\ + "movq %%mm7, " #dst " \n\t"\ + "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "movq %%mm2, 24+" #dst " \n\t"\ + "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\ + "movq %%mm2, 8+" #dst " \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\ + "movq %%mm4, 16+" #dst " \n\t"\ + "jmp 2f \n\t"\ + "1: \n\t"\ + "pslld $16, %%mm0 \n\t"\ + "#paddd "MANGLE(d40000)", %%mm0 \n\t"\ + "psrad $13, %%mm0 \n\t"\ + "packssdw %%mm0, %%mm0 \n\t"\ + "movq %%mm0, " #dst " \n\t"\ + "movq %%mm0, 8+" #dst " \n\t"\ + "movq %%mm0, 16+" #dst " \n\t"\ + "movq %%mm0, 24+" #dst " \n\t"\ + "2: \n\t" + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11) +/*ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11) +ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11) +ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/ + +DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11) +DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11) +DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11) + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + +#else + +#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq "MANGLE(wm1010)", %%mm4 \n\t"\ + "pand %%mm0, %%mm4 \n\t"\ + "por %%mm1, %%mm4 \n\t"\ + "por %%mm2, %%mm4 \n\t"\ + "por %%mm3, %%mm4 \n\t"\ + "packssdw %%mm4,%%mm4 \n\t"\ + "movd %%mm4, %%eax \n\t"\ + "orl %%eax, %%eax \n\t"\ + "jz 1f \n\t"\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + #rounder ", %%mm0 \n\t"\ + "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\ + "paddd %%mm0, %%mm0 \n\t" \ + "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\ + "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\ + "movq %%mm7, " #dst " \n\t"\ + "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "movq %%mm2, 24+" #dst " \n\t"\ + "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\ + "movq %%mm2, 8+" #dst " \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\ + "movq %%mm4, 16+" #dst " \n\t"\ + "jmp 2f \n\t"\ + "1: \n\t"\ + "pslld $16, %%mm0 \n\t"\ + "paddd "MANGLE(d40000)", %%mm0 \n\t"\ + "psrad $13, %%mm0 \n\t"\ + "packssdw %%mm0, %%mm0 \n\t"\ + "movq %%mm0, " #dst " \n\t"\ + "movq %%mm0, 8+" #dst " \n\t"\ + "movq %%mm0, 16+" #dst " \n\t"\ + "movq %%mm0, 24+" #dst " \n\t"\ + "2: \n\t" + +#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq %%mm0, %%mm4 \n\t"\ + "por %%mm1, %%mm4 \n\t"\ + "por %%mm2, %%mm4 \n\t"\ + "por %%mm3, %%mm4 \n\t"\ + "packssdw %%mm4,%%mm4 \n\t"\ + "movd %%mm4, %%eax \n\t"\ + "orl %%eax, %%eax \n\t"\ + "jz " #bt " \n\t"\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + #rounder ", %%mm0 \n\t"\ + "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\ + "paddd %%mm0, %%mm0 \n\t" \ + "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\ + "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\ + "movq %%mm7, " #dst " \n\t"\ + "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "movq %%mm2, 24+" #dst " \n\t"\ + "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\ + "movq %%mm2, 8+" #dst " \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\ + "movq %%mm4, 16+" #dst " \n\t"\ + +#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + #rounder ", %%mm0 \n\t"\ + "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\ + "paddd %%mm0, %%mm0 \n\t" \ + "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\ + "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\ + "movq %%mm7, " #dst " \n\t"\ + "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "movq %%mm2, 24+" #dst " \n\t"\ + "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\ + "movq %%mm2, 8+" #dst " \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\ + "movq %%mm4, 16+" #dst " \n\t"\ + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11) +Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f) +Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f) +Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f) + +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + #rounder ", %%mm0 \n\t"\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\ + "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\ + "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm7, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm2, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm2, 32+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "movd %%mm4, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t" + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + "#.balign 16 \n\t"\ + "4: \n\t" +Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f) +Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f) + +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + #rounder ", %%mm0 \n\t"\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\ + "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\ + "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm1, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm2, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm1 \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm2, 32+" #dst " \n\t"\ + "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "movd %%mm1, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t" + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + "#.balign 16 \n\t"\ + "6: \n\t" +Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f) + +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + #rounder ", %%mm0 \n\t"\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm1, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm2, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm1 \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm2, 32+" #dst " \n\t"\ + "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "movd %%mm1, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t" + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + "#.balign 16 \n\t"\ + "2: \n\t" +Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f) + +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + #rounder ", %%mm0 \n\t"\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\ + "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\ + "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\ + "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm7, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm2, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\ + "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\ + "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\ + "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\ + "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\ + "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\ + "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm2, 32+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "movd %%mm4, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t" + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + "#.balign 16 \n\t"\ + "3: \n\t" +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + #rounder ", %%mm0 \n\t"\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 64(%2), %%mm3 \n\t"\ + "pmaddwd %%mm2, %%mm3 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm1 \n\t" /* A1 a1 */\ + "paddd %%mm3, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm3, %%mm1 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm1 \n\t"\ + "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm7, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm1, %%mm1 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm1, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "movq %%mm5, %%mm1 \n\t" /* A2 a2 */\ + "paddd %%mm4, %%mm1 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm1 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm1, %%mm1 \n\t" /* A2+B2 a2+b2 */\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm1, 32+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "movd %%mm4, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t" + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + "#.balign 16 \n\t"\ + "5: \n\t" +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + #rounder ", %%mm0 \n\t"\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\ + "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\ + "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\ + "movq 8+" #src4 ", %%mm3 \n\t" /* R6 R2 r6 r2 */\ + "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm3, %%mm7 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "pmaddwd 40(%2), %%mm3 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + #rounder ", %%mm1 \n\t"\ + "paddd %%mm1, %%mm7 \n\t" /* A0 a0 */\ + "paddd %%mm1, %%mm1 \n\t" /* 2C0 2c0 */\ + #rounder ", %%mm2 \n\t"\ + "psubd %%mm7, %%mm1 \n\t" /* A3 a3 */\ + "paddd %%mm2, %%mm3 \n\t" /* A1 a1 */\ + "paddd %%mm2, %%mm2 \n\t" /* 2C1 2c1 */\ + "psubd %%mm3, %%mm2 \n\t" /* A2 a2 */\ + "psrad $" #shift ", %%mm4 \n\t"\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm3 \n\t"\ + "packssdw %%mm7, %%mm4 \n\t" /* A0 a0 */\ + "movq %%mm4, " #dst " \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "packssdw %%mm3, %%mm0 \n\t" /* A1 a1 */\ + "movq %%mm0, 16+" #dst " \n\t"\ + "movq %%mm0, 96+" #dst " \n\t"\ + "movq %%mm4, 112+" #dst " \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "psrad $" #shift ", %%mm6 \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm2, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movq %%mm5, 32+" #dst " \n\t"\ + "psrad $" #shift ", %%mm1 \n\t"\ + "packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movq %%mm6, 48+" #dst " \n\t"\ + "movq %%mm6, 64+" #dst " \n\t"\ + "movq %%mm5, 80+" #dst " \n\t" + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + + "#.balign 16 \n\t"\ + "1: \n\t" +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ + "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\ + "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\ + "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\ + "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\ + #rounder ", %%mm4 \n\t"\ + "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\ + #rounder ", %%mm0 \n\t"\ + "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\ + "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\ + "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\ + "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\ + "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\ + "movq 64(%2), %%mm1 \n\t"\ + "pmaddwd %%mm2, %%mm1 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\ + "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\ + "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "psrad $" #shift ", %%mm7 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "movq %%mm0, %%mm3 \n\t" /* A1 a1 */\ + "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "psubd %%mm1, %%mm3 \n\t" /* A1-B1 a1-b1 */\ + "psrad $" #shift ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm3 \n\t"\ + "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\ + "movd %%mm7, " #dst " \n\t"\ + "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\ + "movd %%mm0, 16+" #dst " \n\t"\ + "packssdw %%mm3, %%mm3 \n\t" /* A1-B1 a1-b1 */\ + "movd %%mm3, 96+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\ + "movd %%mm4, 112+" #dst " \n\t"\ + "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\ + "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\ + "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\ + "movq %%mm5, %%mm3 \n\t" /* A2 a2 */\ + "paddd %%mm4, %%mm3 \n\t" /* A2+B2 a2+b2 */\ + "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\ + "psrad $" #shift ", %%mm3 \n\t"\ + "psrad $" #shift ", %%mm5 \n\t"\ + "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\ + "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\ + "psrad $" #shift ", %%mm6 \n\t"\ + "packssdw %%mm3, %%mm3 \n\t" /* A2+B2 a2+b2 */\ + "movd %%mm3, 32+" #dst " \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\ + "movd %%mm6, 48+" #dst " \n\t"\ + "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\ + "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\ + "movd %%mm4, 64+" #dst " \n\t"\ + "movd %%mm5, 80+" #dst " \n\t" + + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + "jmp 9f \n\t" + + + "#.balign 16 \n\t" + "7: \n\t" +#undef IDCT +#define IDCT(src0, src4, src1, src5, dst, rounder, shift) \ + "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\ + "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + #rounder ", %%mm4 \n\t"\ + #rounder ", %%mm0 \n\t"\ + "psrad $" #shift ", %%mm4 \n\t"\ + "psrad $" #shift ", %%mm0 \n\t"\ + "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\ + "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\ + "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\ + "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\ + "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\ + "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\ + #rounder ", %%mm1 \n\t"\ + #rounder ", %%mm2 \n\t"\ + "psrad $" #shift ", %%mm1 \n\t"\ + "packssdw %%mm1, %%mm4 \n\t" /* A0 a0 */\ + "movq %%mm4, " #dst " \n\t"\ + "psrad $" #shift ", %%mm2 \n\t"\ + "packssdw %%mm2, %%mm0 \n\t" /* A1 a1 */\ + "movq %%mm0, 16+" #dst " \n\t"\ + "movq %%mm0, 96+" #dst " \n\t"\ + "movq %%mm4, 112+" #dst " \n\t"\ + "movq %%mm0, 32+" #dst " \n\t"\ + "movq %%mm4, 48+" #dst " \n\t"\ + "movq %%mm4, 64+" #dst " \n\t"\ + "movq %%mm0, 80+" #dst " \n\t" + +//IDCT( src0, src4, src1, src5, dst, rounder, shift) +IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0),/nop, 20) +//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0),/nop, 20) +IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0),/nop, 20) +//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20) + + +#endif + +/* +Input + 00 40 04 44 20 60 24 64 + 10 30 14 34 50 70 54 74 + 01 41 03 43 21 61 23 63 + 11 31 13 33 51 71 53 73 + 02 42 06 46 22 62 26 66 + 12 32 16 36 52 72 56 76 + 05 45 07 47 25 65 27 67 + 15 35 17 37 55 75 57 77 + +Temp + 00 04 10 14 20 24 30 34 + 40 44 50 54 60 64 70 74 + 01 03 11 13 21 23 31 33 + 41 43 51 53 61 63 71 73 + 02 06 12 16 22 26 32 36 + 42 46 52 56 62 66 72 76 + 05 07 15 17 25 27 35 37 + 45 47 55 57 65 67 75 77 +*/ + +"9: \n\t" + :: "r" (block), "r" (temp), "r" (coeffs) + : "%eax" + ); +} + +void ff_simple_idct_mmx(int16_t *block) +{ + idct(block); +} + +//FIXME merge add/put into the idct + +void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block) +{ + idct(block); + put_pixels_clamped_mmx(block, dest, line_size); +} +void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block) +{ + idct(block); + add_pixels_clamped_mmx(block, dest, line_size); +} diff --git a/mpeg4/src/libavcodec/i386/snowdsp_mmx.c b/mpeg4/src/libavcodec/i386/snowdsp_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..5f17e359765c9eaffc1b7bef5c94f69683640fa1 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/snowdsp_mmx.c @@ -0,0 +1,917 @@ +/* + * MMX and SSE2 optimized snow DSP utils + * Copyright (c) 2005-2006 Robert Edele + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "../avcodec.h" +#include "../snow.h" +#include "mmx.h" + +void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){ + const int w2= (width+1)>>1; + // SSE2 code runs faster with pointers aligned on a 32-byte boundary. + DWTELEM temp_buf[(width>>1) + 4]; + DWTELEM * const temp = temp_buf + 4 - (((int)temp_buf & 0xF) >> 2); + const int w_l= (width>>1); + const int w_r= w2 - 1; + int i; + + { // Lift 0 + DWTELEM * const ref = b + w2 - 1; + DWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calculated twice + // (the first time erroneously), we allow the SSE2 code to run an extra pass. + // The savings in code and time are well worth having to store this value and + // calculate b[0] correctly afterwards. + + i = 0; + asm volatile( + "pcmpeqd %%xmm7, %%xmm7 \n\t" + "pslld $31, %%xmm7 \n\t" + "psrld $29, %%xmm7 \n\t" + ::); + for(; i>W_DS); + } + + { // Lift 1 + DWTELEM * const dst = b+w2; + + i = 0; + for(; (((long)&dst[i]) & 0xF) && i> W_BS); + } + + { // Lift 3 + DWTELEM * const src = b+w2; + + i = 0; + for(; (((long)&temp[i]) & 0xF) && i>W_AS); + } + for(; i>1]; + b[i] = b[i>>1]; + } + for (i-=30; i>=0; i-=32){ + asm volatile( + "movdqa (%1), %%xmm0 \n\t" + "movdqa 16(%1), %%xmm2 \n\t" + "movdqa 32(%1), %%xmm4 \n\t" + "movdqa 48(%1), %%xmm6 \n\t" + "movdqa (%1), %%xmm1 \n\t" + "movdqa 16(%1), %%xmm3 \n\t" + "movdqa 32(%1), %%xmm5 \n\t" + "movdqa 48(%1), %%xmm7 \n\t" + "punpckldq (%2), %%xmm0 \n\t" + "punpckldq 16(%2), %%xmm2 \n\t" + "punpckldq 32(%2), %%xmm4 \n\t" + "punpckldq 48(%2), %%xmm6 \n\t" + "movdqa %%xmm0, (%0) \n\t" + "movdqa %%xmm2, 32(%0) \n\t" + "movdqa %%xmm4, 64(%0) \n\t" + "movdqa %%xmm6, 96(%0) \n\t" + "punpckhdq (%2), %%xmm1 \n\t" + "punpckhdq 16(%2), %%xmm3 \n\t" + "punpckhdq 32(%2), %%xmm5 \n\t" + "punpckhdq 48(%2), %%xmm7 \n\t" + "movdqa %%xmm1, 16(%0) \n\t" + "movdqa %%xmm3, 48(%0) \n\t" + "movdqa %%xmm5, 80(%0) \n\t" + "movdqa %%xmm7, 112(%0) \n\t" + :: "r"(&(b)[i]), "r"(&(b)[i>>1]), "r"(&(temp)[i>>1]) + : "memory" + ); + } + } +} + +void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){ + const int w2= (width+1)>>1; + DWTELEM temp[width >> 1]; + const int w_l= (width>>1); + const int w_r= w2 - 1; + int i; + + { // Lift 0 + DWTELEM * const ref = b + w2 - 1; + + i = 1; + b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS); + asm volatile( + "pcmpeqd %%mm7, %%mm7 \n\t" + "pslld $31, %%mm7 \n\t" + "psrld $29, %%mm7 \n\t" + ::); + for(; i> W_BS); + asm volatile( + "pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */ + ::); + for(; i>1]; + b[i] = b[i>>1]; + } + for (i-=14; i>=0; i-=16){ + asm volatile( + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" + "movq 16(%1), %%mm4 \n\t" + "movq 24(%1), %%mm6 \n\t" + "movq (%1), %%mm1 \n\t" + "movq 8(%1), %%mm3 \n\t" + "movq 16(%1), %%mm5 \n\t" + "movq 24(%1), %%mm7 \n\t" + "punpckldq (%2), %%mm0 \n\t" + "punpckldq 8(%2), %%mm2 \n\t" + "punpckldq 16(%2), %%mm4 \n\t" + "punpckldq 24(%2), %%mm6 \n\t" + "movq %%mm0, (%0) \n\t" + "movq %%mm2, 16(%0) \n\t" + "movq %%mm4, 32(%0) \n\t" + "movq %%mm6, 48(%0) \n\t" + "punpckhdq (%2), %%mm1 \n\t" + "punpckhdq 8(%2), %%mm3 \n\t" + "punpckhdq 16(%2), %%mm5 \n\t" + "punpckhdq 24(%2), %%mm7 \n\t" + "movq %%mm1, 8(%0) \n\t" + "movq %%mm3, 24(%0) \n\t" + "movq %%mm5, 40(%0) \n\t" + "movq %%mm7, 56(%0) \n\t" + :: "r"(&b[i]), "r"(&b[i>>1]), "r"(&temp[i>>1]) + : "memory" + ); + } + } +} + +#define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\ + ""op" (%%"r",%%"REG_d",4), %%"t0" \n\t"\ + ""op" 16(%%"r",%%"REG_d",4), %%"t1" \n\t"\ + ""op" 32(%%"r",%%"REG_d",4), %%"t2" \n\t"\ + ""op" 48(%%"r",%%"REG_d",4), %%"t3" \n\t" + +#define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\ + snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3) + +#define snow_vertical_compose_sse2_add(r,t0,t1,t2,t3)\ + snow_vertical_compose_sse2_load_add("paddd",r,t0,t1,t2,t3) + +#define snow_vertical_compose_sse2_sub(s0,s1,s2,s3,t0,t1,t2,t3)\ + "psubd %%"s0", %%"t0" \n\t"\ + "psubd %%"s1", %%"t1" \n\t"\ + "psubd %%"s2", %%"t2" \n\t"\ + "psubd %%"s3", %%"t3" \n\t" + +#define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\ + "movdqa %%"s0", (%%"w",%%"REG_d",4) \n\t"\ + "movdqa %%"s1", 16(%%"w",%%"REG_d",4) \n\t"\ + "movdqa %%"s2", 32(%%"w",%%"REG_d",4) \n\t"\ + "movdqa %%"s3", 48(%%"w",%%"REG_d",4) \n\t" + +#define snow_vertical_compose_sse2_sra(n,t0,t1,t2,t3)\ + "psrad $"n", %%"t0" \n\t"\ + "psrad $"n", %%"t1" \n\t"\ + "psrad $"n", %%"t2" \n\t"\ + "psrad $"n", %%"t3" \n\t" + +#define snow_vertical_compose_sse2_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\ + "paddd %%"s0", %%"t0" \n\t"\ + "paddd %%"s1", %%"t1" \n\t"\ + "paddd %%"s2", %%"t2" \n\t"\ + "paddd %%"s3", %%"t3" \n\t" + +#define snow_vertical_compose_sse2_sll(n,t0,t1,t2,t3)\ + "pslld $"n", %%"t0" \n\t"\ + "pslld $"n", %%"t1" \n\t"\ + "pslld $"n", %%"t2" \n\t"\ + "pslld $"n", %%"t3" \n\t" + +#define snow_vertical_compose_sse2_move(s0,s1,s2,s3,t0,t1,t2,t3)\ + "movdqa %%"s0", %%"t0" \n\t"\ + "movdqa %%"s1", %%"t1" \n\t"\ + "movdqa %%"s2", %%"t2" \n\t"\ + "movdqa %%"s3", %%"t3" \n\t" + +void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ + long i = width; + + while(i & 0xF) + { + i--; + b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; + b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; + b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; + b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } + + asm volatile ( + "jmp 2f \n\t" + "1: \n\t" + + "mov %6, %%"REG_a" \n\t" + "mov %4, %%"REG_b" \n\t" + + snow_vertical_compose_sse2_load(REG_b,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7") + snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\ + snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6") + + "pcmpeqd %%xmm1, %%xmm1 \n\t" + "pslld $31, %%xmm1 \n\t" + "psrld $29, %%xmm1 \n\t" + "mov %5, %%"REG_a" \n\t" + + snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_sra("3","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_load(REG_a,"xmm1","xmm3","xmm5","xmm7") + snow_vertical_compose_sse2_sub("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7") + snow_vertical_compose_sse2_store(REG_a,"xmm1","xmm3","xmm5","xmm7") + "mov %3, %%"REG_c" \n\t" + snow_vertical_compose_sse2_load(REG_b,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_add(REG_c,"xmm1","xmm3","xmm5","xmm7") + snow_vertical_compose_sse2_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_store(REG_b,"xmm0","xmm2","xmm4","xmm6") + "mov %2, %%"REG_a" \n\t" + snow_vertical_compose_sse2_load(REG_c,"xmm1","xmm3","xmm5","xmm7") + snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_sll("2","xmm1","xmm3","xmm5","xmm7")\ + snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6") + + "pcmpeqd %%xmm1, %%xmm1 \n\t" + "pslld $31, %%xmm1 \n\t" + "psrld $28, %%xmm1 \n\t" + "mov %1, %%"REG_b" \n\t" + + snow_vertical_compose_sse2_r2r_add("xmm1","xmm1","xmm1","xmm1","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_sra("4","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_add(REG_c,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_store(REG_c,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_add(REG_b,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7") + snow_vertical_compose_sse2_sll("1","xmm0","xmm2","xmm4","xmm6")\ + snow_vertical_compose_sse2_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_sra("1","xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_add(REG_a,"xmm0","xmm2","xmm4","xmm6") + snow_vertical_compose_sse2_store(REG_a,"xmm0","xmm2","xmm4","xmm6") + + "2: \n\t" + "sub $16, %%"REG_d" \n\t" + "jge 1b \n\t" + :"+d"(i) + : + "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5): + "%"REG_a"","%"REG_b"","%"REG_c""); +} + +#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\ + ""op" (%%"r",%%"REG_d",4), %%"t0" \n\t"\ + ""op" 8(%%"r",%%"REG_d",4), %%"t1" \n\t"\ + ""op" 16(%%"r",%%"REG_d",4), %%"t2" \n\t"\ + ""op" 24(%%"r",%%"REG_d",4), %%"t3" \n\t" + +#define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\ + snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3) + +#define snow_vertical_compose_mmx_add(r,t0,t1,t2,t3)\ + snow_vertical_compose_mmx_load_add("paddd",r,t0,t1,t2,t3) + +#define snow_vertical_compose_mmx_sub(s0,s1,s2,s3,t0,t1,t2,t3)\ + snow_vertical_compose_sse2_sub(s0,s1,s2,s3,t0,t1,t2,t3) + +#define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\ + "movq %%"s0", (%%"w",%%"REG_d",4) \n\t"\ + "movq %%"s1", 8(%%"w",%%"REG_d",4) \n\t"\ + "movq %%"s2", 16(%%"w",%%"REG_d",4) \n\t"\ + "movq %%"s3", 24(%%"w",%%"REG_d",4) \n\t" + +#define snow_vertical_compose_mmx_sra(n,t0,t1,t2,t3)\ + snow_vertical_compose_sse2_sra(n,t0,t1,t2,t3) + +#define snow_vertical_compose_mmx_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\ + snow_vertical_compose_sse2_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3) + +#define snow_vertical_compose_mmx_sll(n,t0,t1,t2,t3)\ + snow_vertical_compose_sse2_sll(n,t0,t1,t2,t3) + +#define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\ + "movq %%"s0", %%"t0" \n\t"\ + "movq %%"s1", %%"t1" \n\t"\ + "movq %%"s2", %%"t2" \n\t"\ + "movq %%"s3", %%"t3" \n\t" + +void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ + long i = width; + while(i & 0x7) + { + i--; + b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; + b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; + b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; + b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } + + asm volatile( + "jmp 2f \n\t" + "1: \n\t" + + "mov %6, %%"REG_a" \n\t" + "mov %4, %%"REG_b" \n\t" + + snow_vertical_compose_mmx_load(REG_b,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6") + + "pcmpeqd %%mm1, %%mm1 \n\t" + "pslld $31, %%mm1 \n\t" + "psrld $29, %%mm1 \n\t" + "mov %5, %%"REG_a" \n\t" + + snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_sra("3","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_load(REG_a,"mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_sub("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_store(REG_a,"mm1","mm3","mm5","mm7") + "mov %3, %%"REG_c" \n\t" + snow_vertical_compose_mmx_load(REG_b,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_add(REG_c,"mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_store(REG_b,"mm0","mm2","mm4","mm6") + "mov %2, %%"REG_a" \n\t" + snow_vertical_compose_mmx_load(REG_c,"mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_sll("2","mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6") + + "pcmpeqd %%mm1, %%mm1 \n\t" + "pslld $31, %%mm1 \n\t" + "psrld $28, %%mm1 \n\t" + "mov %1, %%"REG_b" \n\t" + + snow_vertical_compose_mmx_r2r_add("mm1","mm1","mm1","mm1","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_sra("4","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_add(REG_c,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_store(REG_c,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_add(REG_b,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7") + snow_vertical_compose_mmx_sll("1","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_sra("1","mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_add(REG_a,"mm0","mm2","mm4","mm6") + snow_vertical_compose_mmx_store(REG_a,"mm0","mm2","mm4","mm6") + + "2: \n\t" + "sub $8, %%"REG_d" \n\t" + "jge 1b \n\t" + :"+d"(i) + : + "m"(b0),"m"(b1),"m"(b2),"m"(b3),"m"(b4),"m"(b5): + "%"REG_a"","%"REG_b"","%"REG_c""); +} + +#define snow_inner_add_yblock_sse2_header \ + DWTELEM * * dst_array = sb->line + src_y;\ + asm volatile(\ + "mov %6, %%"REG_c" \n\t"\ + "mov %5, %%"REG_b" \n\t"\ + "mov %3, %%"REG_S" \n\t"\ + "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\ + "pcmpeqd %%xmm3, %%xmm3 \n\t"\ + "pslld $31, %%xmm3 \n\t"\ + "psrld $24, %%xmm3 \n\t" /* FRAC_BITS >> 1 */\ + "1: \n\t"\ + "mov %1, %%"REG_D" \n\t"\ + "mov (%%"REG_D"), %%"REG_D" \n\t"\ + "add %2, %%"REG_D" \n\t" + +#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\ + "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ + "movq (%%"REG_d"), %%"out_reg1" \n\t"\ + "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\ + "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ + "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ + "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ + "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\ + "punpcklbw %%xmm7, %%xmm0 \n\t"\ + "punpcklbw %%xmm7, %%xmm4 \n\t"\ + "pmullw %%xmm0, %%"out_reg1" \n\t"\ + "pmullw %%xmm4, %%"out_reg2" \n\t" + +#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\ + "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ + "movq (%%"REG_d"), %%"out_reg1" \n\t"\ + "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\ + "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ + "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ + "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ + "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\ + "punpcklbw %%xmm7, %%xmm0 \n\t"\ + "punpcklbw %%xmm7, %%xmm4 \n\t"\ + "pmullw %%xmm0, %%"out_reg1" \n\t"\ + "pmullw %%xmm4, %%"out_reg2" \n\t" + +#define snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) \ + snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\ + "paddusw %%xmm2, %%xmm1 \n\t"\ + "paddusw %%xmm6, %%xmm5 \n\t" + +#define snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) \ + snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\ + "paddusw %%xmm2, %%xmm1 \n\t"\ + "paddusw %%xmm6, %%xmm5 \n\t" + +#define snow_inner_add_yblock_sse2_end_common1\ + "add $32, %%"REG_S" \n\t"\ + "add %%"REG_c", %0 \n\t"\ + "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ + "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ + "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ + "add %%"REG_c", (%%"REG_a") \n\t" + +#define snow_inner_add_yblock_sse2_end_common2\ + "jnz 1b \n\t"\ + :"+m"(dst8),"+m"(dst_array)\ + :\ + "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ + "%"REG_b"","%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); + +#define snow_inner_add_yblock_sse2_end_8\ + "sal $1, %%"REG_c" \n\t"\ + "add $"PTR_SIZE"*2, %1 \n\t"\ + snow_inner_add_yblock_sse2_end_common1\ + "sar $1, %%"REG_c" \n\t"\ + "sub $2, %%"REG_b" \n\t"\ + snow_inner_add_yblock_sse2_end_common2 + +#define snow_inner_add_yblock_sse2_end_16\ + "add $"PTR_SIZE"*1, %1 \n\t"\ + snow_inner_add_yblock_sse2_end_common1\ + "dec %%"REG_b" \n\t"\ + snow_inner_add_yblock_sse2_end_common2 + +static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, + int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +snow_inner_add_yblock_sse2_header +snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0") +snow_inner_add_yblock_sse2_accum_8("2", "8") +snow_inner_add_yblock_sse2_accum_8("1", "128") +snow_inner_add_yblock_sse2_accum_8("0", "136") + + "mov %0, %%"REG_d" \n\t" + "movdqa (%%"REG_D"), %%xmm0 \n\t" + "movdqa %%xmm1, %%xmm2 \n\t" + + "punpckhwd %%xmm7, %%xmm1 \n\t" + "punpcklwd %%xmm7, %%xmm2 \n\t" + "paddd %%xmm2, %%xmm0 \n\t" + "movdqa 16(%%"REG_D"), %%xmm2 \n\t" + "paddd %%xmm1, %%xmm2 \n\t" + "paddd %%xmm3, %%xmm0 \n\t" + "paddd %%xmm3, %%xmm2 \n\t" + + "mov %1, %%"REG_D" \n\t" + "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t" + "add %2, %%"REG_D" \n\t" + + "movdqa (%%"REG_D"), %%xmm4 \n\t" + "movdqa %%xmm5, %%xmm6 \n\t" + "punpckhwd %%xmm7, %%xmm5 \n\t" + "punpcklwd %%xmm7, %%xmm6 \n\t" + "paddd %%xmm6, %%xmm4 \n\t" + "movdqa 16(%%"REG_D"), %%xmm6 \n\t" + "paddd %%xmm5, %%xmm6 \n\t" + "paddd %%xmm3, %%xmm4 \n\t" + "paddd %%xmm3, %%xmm6 \n\t" + + "psrad $8, %%xmm0 \n\t" /* FRAC_BITS. */ + "psrad $8, %%xmm2 \n\t" /* FRAC_BITS. */ + "packssdw %%xmm2, %%xmm0 \n\t" + "packuswb %%xmm7, %%xmm0 \n\t" + "movq %%xmm0, (%%"REG_d") \n\t" + + "psrad $8, %%xmm4 \n\t" /* FRAC_BITS. */ + "psrad $8, %%xmm6 \n\t" /* FRAC_BITS. */ + "packssdw %%xmm6, %%xmm4 \n\t" + "packuswb %%xmm7, %%xmm4 \n\t" + "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t" +snow_inner_add_yblock_sse2_end_8 +} + +static void inner_add_yblock_bw_16_obmc_32_sse2(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, + int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +snow_inner_add_yblock_sse2_header +snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0") +snow_inner_add_yblock_sse2_accum_16("2", "16") +snow_inner_add_yblock_sse2_accum_16("1", "512") +snow_inner_add_yblock_sse2_accum_16("0", "528") + + "mov %0, %%"REG_d" \n\t" + "movdqa %%xmm1, %%xmm0 \n\t" + "movdqa %%xmm5, %%xmm4 \n\t" + "punpcklwd %%xmm7, %%xmm0 \n\t" + "paddd (%%"REG_D"), %%xmm0 \n\t" + "punpckhwd %%xmm7, %%xmm1 \n\t" + "paddd 16(%%"REG_D"), %%xmm1 \n\t" + "punpcklwd %%xmm7, %%xmm4 \n\t" + "paddd 32(%%"REG_D"), %%xmm4 \n\t" + "punpckhwd %%xmm7, %%xmm5 \n\t" + "paddd 48(%%"REG_D"), %%xmm5 \n\t" + "paddd %%xmm3, %%xmm0 \n\t" + "paddd %%xmm3, %%xmm1 \n\t" + "paddd %%xmm3, %%xmm4 \n\t" + "paddd %%xmm3, %%xmm5 \n\t" + "psrad $8, %%xmm0 \n\t" /* FRAC_BITS. */ + "psrad $8, %%xmm1 \n\t" /* FRAC_BITS. */ + "psrad $8, %%xmm4 \n\t" /* FRAC_BITS. */ + "psrad $8, %%xmm5 \n\t" /* FRAC_BITS. */ + + "packssdw %%xmm1, %%xmm0 \n\t" + "packssdw %%xmm5, %%xmm4 \n\t" + "packuswb %%xmm4, %%xmm0 \n\t" + + "movdqu %%xmm0, (%%"REG_d") \n\t" + +snow_inner_add_yblock_sse2_end_16 +} + +#define snow_inner_add_yblock_mmx_header \ + DWTELEM * * dst_array = sb->line + src_y;\ + asm volatile(\ + "mov %6, %%"REG_c" \n\t"\ + "mov %5, %%"REG_b" \n\t"\ + "mov %3, %%"REG_S" \n\t"\ + "pxor %%mm7, %%mm7 \n\t" /* 0 */\ + "pcmpeqd %%mm3, %%mm3 \n\t"\ + "pslld $31, %%mm3 \n\t"\ + "psrld $24, %%mm3 \n\t" /* FRAC_BITS >> 1 */\ + "1: \n\t"\ + "mov %1, %%"REG_D" \n\t"\ + "mov (%%"REG_D"), %%"REG_D" \n\t"\ + "add %2, %%"REG_D" \n\t" + +#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\ + "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ + "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\ + "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\ + "punpcklbw %%mm7, %%"out_reg1" \n\t"\ + "punpcklbw %%mm7, %%"out_reg2" \n\t"\ + "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\ + "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + "pmullw %%mm0, %%"out_reg1" \n\t"\ + "pmullw %%mm4, %%"out_reg2" \n\t" + +#define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \ + snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\ + "paddusw %%mm2, %%mm1 \n\t"\ + "paddusw %%mm6, %%mm5 \n\t" + +#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\ + "mov %0, %%"REG_d" \n\t"\ + "movq %%mm1, %%mm0 \n\t"\ + "movq %%mm5, %%mm4 \n\t"\ + "punpcklwd %%mm7, %%mm0 \n\t"\ + "paddd "read_offset"(%%"REG_D"), %%mm0 \n\t"\ + "punpckhwd %%mm7, %%mm1 \n\t"\ + "paddd "read_offset"+8(%%"REG_D"), %%mm1 \n\t"\ + "punpcklwd %%mm7, %%mm4 \n\t"\ + "paddd "read_offset"+16(%%"REG_D"), %%mm4 \n\t"\ + "punpckhwd %%mm7, %%mm5 \n\t"\ + "paddd "read_offset"+24(%%"REG_D"), %%mm5 \n\t"\ + "paddd %%mm3, %%mm0 \n\t"\ + "paddd %%mm3, %%mm1 \n\t"\ + "paddd %%mm3, %%mm4 \n\t"\ + "paddd %%mm3, %%mm5 \n\t"\ + "psrad $8, %%mm0 \n\t"\ + "psrad $8, %%mm1 \n\t"\ + "psrad $8, %%mm4 \n\t"\ + "psrad $8, %%mm5 \n\t"\ +\ + "packssdw %%mm1, %%mm0 \n\t"\ + "packssdw %%mm5, %%mm4 \n\t"\ + "packuswb %%mm4, %%mm0 \n\t"\ + "movq %%mm0, "write_offset"(%%"REG_d") \n\t" + +#define snow_inner_add_yblock_mmx_end(s_step)\ + "add $"s_step", %%"REG_S" \n\t"\ + "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ + "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ + "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ + "add %%"REG_c", (%%"REG_a") \n\t"\ + "add $"PTR_SIZE"*1, %1 \n\t"\ + "add %%"REG_c", %0 \n\t"\ + "dec %%"REG_b" \n\t"\ + "jnz 1b \n\t"\ + :"+m"(dst8),"+m"(dst_array)\ + :\ + "rm"((long)(src_x<<2)),"m"(obmc),"a"(block),"m"((long)b_h),"m"((long)src_stride):\ + "%"REG_b"","%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d""); + +static void inner_add_yblock_bw_8_obmc_16_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, + int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +snow_inner_add_yblock_mmx_header +snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") +snow_inner_add_yblock_mmx_accum("2", "8", "0") +snow_inner_add_yblock_mmx_accum("1", "128", "0") +snow_inner_add_yblock_mmx_accum("0", "136", "0") +snow_inner_add_yblock_mmx_mix("0", "0") +snow_inner_add_yblock_mmx_end("16") +} + +static void inner_add_yblock_bw_16_obmc_32_mmx(uint8_t *obmc, const long obmc_stride, uint8_t * * block, int b_w, long b_h, + int src_x, int src_y, long src_stride, slice_buffer * sb, int add, uint8_t * dst8){ +snow_inner_add_yblock_mmx_header +snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0") +snow_inner_add_yblock_mmx_accum("2", "16", "0") +snow_inner_add_yblock_mmx_accum("1", "512", "0") +snow_inner_add_yblock_mmx_accum("0", "528", "0") +snow_inner_add_yblock_mmx_mix("0", "0") + +snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "8", "8") +snow_inner_add_yblock_mmx_accum("2", "24", "8") +snow_inner_add_yblock_mmx_accum("1", "520", "8") +snow_inner_add_yblock_mmx_accum("0", "536", "8") +snow_inner_add_yblock_mmx_mix("32", "8") +snow_inner_add_yblock_mmx_end("32") +} + +void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ + + if (b_w == 16) + inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + else if (b_w == 8 && obmc_stride == 16) { + if (!(b_h & 1)) + inner_add_yblock_bw_8_obmc_16_bh_even_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + else + inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + } else + ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); +} + +void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ + if (b_w == 16) + inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + else if (b_w == 8 && obmc_stride == 16) + inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); + else + ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); +} diff --git a/mpeg4/src/libavcodec/i386/vp3dsp_mmx.c b/mpeg4/src/libavcodec/i386/vp3dsp_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..0684531ae45baeca3e2b8830671429ecc82567b8 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/vp3dsp_mmx.c @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2004 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file vp3dsp_mmx.c + * MMX-optimized functions cribbed from the original VP3 source code. + */ + +#include "../dsputil.h" +#include "mmx.h" + +#define IdctAdjustBeforeShift 8 + +/* (12 * 4) 2-byte memory locations ( = 96 bytes total) + * idct_constants[0..15] = Mask table (M(I)) + * idct_constants[16..43] = Cosine table (C(I)) + * idct_constants[44..47] = 8 + */ +static uint16_t idct_constants[(4 + 7 + 1) * 4]; +static const uint16_t idct_cosine_table[7] = { + 64277, 60547, 54491, 46341, 36410, 25080, 12785 +}; + +#define r0 mm0 +#define r1 mm1 +#define r2 mm2 +#define r3 mm3 +#define r4 mm4 +#define r5 mm5 +#define r6 mm6 +#define r7 mm7 + +/* from original comments: The Macro does IDct on 4 1-D Dcts */ +#define BeginIDCT() { \ + movq_m2r(*I(3), r2); \ + movq_m2r(*C(3), r6); \ + movq_r2r(r2, r4); \ + movq_m2r(*J(5), r7); \ + pmulhw_r2r(r6, r4); /* r4 = c3*i3 - i3 */ \ + movq_m2r(*C(5), r1); \ + pmulhw_r2r(r7, r6); /* r6 = c3*i5 - i5 */ \ + movq_r2r(r1, r5); \ + pmulhw_r2r(r2, r1); /* r1 = c5*i3 - i3 */ \ + movq_m2r(*I(1), r3); \ + pmulhw_r2r(r7, r5); /* r5 = c5*i5 - i5 */ \ + movq_m2r(*C(1), r0); /* (all registers are in use) */ \ + paddw_r2r(r2, r4); /* r4 = c3*i3 */ \ + paddw_r2r(r7, r6); /* r6 = c3*i5 */ \ + paddw_r2r(r1, r2); /* r2 = c5*i3 */ \ + movq_m2r(*J(7), r1); \ + paddw_r2r(r5, r7); /* r7 = c5*i5 */ \ + movq_r2r(r0, r5); /* r5 = c1 */ \ + pmulhw_r2r(r3, r0); /* r0 = c1*i1 - i1 */ \ + paddsw_r2r(r7, r4); /* r4 = C = c3*i3 + c5*i5 */ \ + pmulhw_r2r(r1, r5); /* r5 = c1*i7 - i7 */ \ + movq_m2r(*C(7), r7); \ + psubsw_r2r(r2, r6); /* r6 = D = c3*i5 - c5*i3 */ \ + paddw_r2r(r3, r0); /* r0 = c1*i1 */ \ + pmulhw_r2r(r7, r3); /* r3 = c7*i1 */ \ + movq_m2r(*I(2), r2); \ + pmulhw_r2r(r1, r7); /* r7 = c7*i7 */ \ + paddw_r2r(r1, r5); /* r5 = c1*i7 */ \ + movq_r2r(r2, r1); /* r1 = i2 */ \ + pmulhw_m2r(*C(2), r2); /* r2 = c2*i2 - i2 */ \ + psubsw_r2r(r5, r3); /* r3 = B = c7*i1 - c1*i7 */ \ + movq_m2r(*J(6), r5); \ + paddsw_r2r(r7, r0); /* r0 = A = c1*i1 + c7*i7 */ \ + movq_r2r(r5, r7); /* r7 = i6 */ \ + psubsw_r2r(r4, r0); /* r0 = A - C */ \ + pmulhw_m2r(*C(2), r5); /* r5 = c2*i6 - i6 */ \ + paddw_r2r(r1, r2); /* r2 = c2*i2 */ \ + pmulhw_m2r(*C(6), r1); /* r1 = c6*i2 */ \ + paddsw_r2r(r4, r4); /* r4 = C + C */ \ + paddsw_r2r(r0, r4); /* r4 = C. = A + C */ \ + psubsw_r2r(r6, r3); /* r3 = B - D */ \ + paddw_r2r(r7, r5); /* r5 = c2*i6 */ \ + paddsw_r2r(r6, r6); /* r6 = D + D */ \ + pmulhw_m2r(*C(6), r7); /* r7 = c6*i6 */ \ + paddsw_r2r(r3, r6); /* r6 = D. = B + D */ \ + movq_r2m(r4, *I(1)); /* save C. at I(1) */ \ + psubsw_r2r(r5, r1); /* r1 = H = c6*i2 - c2*i6 */ \ + movq_m2r(*C(4), r4); \ + movq_r2r(r3, r5); /* r5 = B - D */ \ + pmulhw_r2r(r4, r3); /* r3 = (c4 - 1) * (B - D) */ \ + paddsw_r2r(r2, r7); /* r7 = G = c6*i6 + c2*i2 */ \ + movq_r2m(r6, *I(2)); /* save D. at I(2) */ \ + movq_r2r(r0, r2); /* r2 = A - C */ \ + movq_m2r(*I(0), r6); \ + pmulhw_r2r(r4, r0); /* r0 = (c4 - 1) * (A - C) */ \ + paddw_r2r(r3, r5); /* r5 = B. = c4 * (B - D) */ \ + movq_m2r(*J(4), r3); \ + psubsw_r2r(r1, r5); /* r5 = B.. = B. - H */ \ + paddw_r2r(r0, r2); /* r0 = A. = c4 * (A - C) */ \ + psubsw_r2r(r3, r6); /* r6 = i0 - i4 */ \ + movq_r2r(r6, r0); \ + pmulhw_r2r(r4, r6); /* r6 = (c4 - 1) * (i0 - i4) */ \ + paddsw_r2r(r3, r3); /* r3 = i4 + i4 */ \ + paddsw_r2r(r1, r1); /* r1 = H + H */ \ + paddsw_r2r(r0, r3); /* r3 = i0 + i4 */ \ + paddsw_r2r(r5, r1); /* r1 = H. = B + H */ \ + pmulhw_r2r(r3, r4); /* r4 = (c4 - 1) * (i0 + i4) */ \ + paddsw_r2r(r0, r6); /* r6 = F = c4 * (i0 - i4) */ \ + psubsw_r2r(r2, r6); /* r6 = F. = F - A. */ \ + paddsw_r2r(r2, r2); /* r2 = A. + A. */ \ + movq_m2r(*I(1), r0); /* r0 = C. */ \ + paddsw_r2r(r6, r2); /* r2 = A.. = F + A. */ \ + paddw_r2r(r3, r4); /* r4 = E = c4 * (i0 + i4) */ \ + psubsw_r2r(r1, r2); /* r2 = R2 = A.. - H. */ \ +} + +/* RowIDCT gets ready to transpose */ +#define RowIDCT() { \ + \ + BeginIDCT(); \ + \ + movq_m2r(*I(2), r3); /* r3 = D. */ \ + psubsw_r2r(r7, r4); /* r4 = E. = E - G */ \ + paddsw_r2r(r1, r1); /* r1 = H. + H. */ \ + paddsw_r2r(r7, r7); /* r7 = G + G */ \ + paddsw_r2r(r2, r1); /* r1 = R1 = A.. + H. */ \ + paddsw_r2r(r4, r7); /* r7 = G. = E + G */ \ + psubsw_r2r(r3, r4); /* r4 = R4 = E. - D. */ \ + paddsw_r2r(r3, r3); \ + psubsw_r2r(r5, r6); /* r6 = R6 = F. - B.. */ \ + paddsw_r2r(r5, r5); \ + paddsw_r2r(r4, r3); /* r3 = R3 = E. + D. */ \ + paddsw_r2r(r6, r5); /* r5 = R5 = F. + B.. */ \ + psubsw_r2r(r0, r7); /* r7 = R7 = G. - C. */ \ + paddsw_r2r(r0, r0); \ + movq_r2m(r1, *I(1)); /* save R1 */ \ + paddsw_r2r(r7, r0); /* r0 = R0 = G. + C. */ \ +} + +/* Column IDCT normalizes and stores final results */ +#define ColumnIDCT() { \ + \ + BeginIDCT(); \ + \ + paddsw_m2r(*Eight, r2); /* adjust R2 (and R1) for shift */ \ + paddsw_r2r(r1, r1); /* r1 = H. + H. */ \ + paddsw_r2r(r2, r1); /* r1 = R1 = A.. + H. */ \ + psraw_i2r(4, r2); /* r2 = NR2 */ \ + psubsw_r2r(r7, r4); /* r4 = E. = E - G */ \ + psraw_i2r(4, r1); /* r1 = NR1 */ \ + movq_m2r(*I(2), r3); /* r3 = D. */ \ + paddsw_r2r(r7, r7); /* r7 = G + G */ \ + movq_r2m(r2, *I(2)); /* store NR2 at I2 */ \ + paddsw_r2r(r4, r7); /* r7 = G. = E + G */ \ + movq_r2m(r1, *I(1)); /* store NR1 at I1 */ \ + psubsw_r2r(r3, r4); /* r4 = R4 = E. - D. */ \ + paddsw_m2r(*Eight, r4); /* adjust R4 (and R3) for shift */ \ + paddsw_r2r(r3, r3); /* r3 = D. + D. */ \ + paddsw_r2r(r4, r3); /* r3 = R3 = E. + D. */ \ + psraw_i2r(4, r4); /* r4 = NR4 */ \ + psubsw_r2r(r5, r6); /* r6 = R6 = F. - B.. */ \ + psraw_i2r(4, r3); /* r3 = NR3 */ \ + paddsw_m2r(*Eight, r6); /* adjust R6 (and R5) for shift */ \ + paddsw_r2r(r5, r5); /* r5 = B.. + B.. */ \ + paddsw_r2r(r6, r5); /* r5 = R5 = F. + B.. */ \ + psraw_i2r(4, r6); /* r6 = NR6 */ \ + movq_r2m(r4, *J(4)); /* store NR4 at J4 */ \ + psraw_i2r(4, r5); /* r5 = NR5 */ \ + movq_r2m(r3, *I(3)); /* store NR3 at I3 */ \ + psubsw_r2r(r0, r7); /* r7 = R7 = G. - C. */ \ + paddsw_m2r(*Eight, r7); /* adjust R7 (and R0) for shift */ \ + paddsw_r2r(r0, r0); /* r0 = C. + C. */ \ + paddsw_r2r(r7, r0); /* r0 = R0 = G. + C. */ \ + psraw_i2r(4, r7); /* r7 = NR7 */ \ + movq_r2m(r6, *J(6)); /* store NR6 at J6 */ \ + psraw_i2r(4, r0); /* r0 = NR0 */ \ + movq_r2m(r5, *J(5)); /* store NR5 at J5 */ \ + movq_r2m(r7, *J(7)); /* store NR7 at J7 */ \ + movq_r2m(r0, *I(0)); /* store NR0 at I0 */ \ +} + +/* Following macro does two 4x4 transposes in place. + + At entry (we assume): + + r0 = a3 a2 a1 a0 + I(1) = b3 b2 b1 b0 + r2 = c3 c2 c1 c0 + r3 = d3 d2 d1 d0 + + r4 = e3 e2 e1 e0 + r5 = f3 f2 f1 f0 + r6 = g3 g2 g1 g0 + r7 = h3 h2 h1 h0 + + At exit, we have: + + I(0) = d0 c0 b0 a0 + I(1) = d1 c1 b1 a1 + I(2) = d2 c2 b2 a2 + I(3) = d3 c3 b3 a3 + + J(4) = h0 g0 f0 e0 + J(5) = h1 g1 f1 e1 + J(6) = h2 g2 f2 e2 + J(7) = h3 g3 f3 e3 + + I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3. + J(4) J(5) J(6) J(7) is the transpose of r4 r5 r6 r7. + + Since r1 is free at entry, we calculate the Js first. */ + +#define Transpose() { \ + movq_r2r(r4, r1); /* r1 = e3 e2 e1 e0 */ \ + punpcklwd_r2r(r5, r4); /* r4 = f1 e1 f0 e0 */ \ + movq_r2m(r0, *I(0)); /* save a3 a2 a1 a0 */ \ + punpckhwd_r2r(r5, r1); /* r1 = f3 e3 f2 e2 */ \ + movq_r2r(r6, r0); /* r0 = g3 g2 g1 g0 */ \ + punpcklwd_r2r(r7, r6); /* r6 = h1 g1 h0 g0 */ \ + movq_r2r(r4, r5); /* r5 = f1 e1 f0 e0 */ \ + punpckldq_r2r(r6, r4); /* r4 = h0 g0 f0 e0 = R4 */ \ + punpckhdq_r2r(r6, r5); /* r5 = h1 g1 f1 e1 = R5 */ \ + movq_r2r(r1, r6); /* r6 = f3 e3 f2 e2 */ \ + movq_r2m(r4, *J(4)); \ + punpckhwd_r2r(r7, r0); /* r0 = h3 g3 h2 g2 */ \ + movq_r2m(r5, *J(5)); \ + punpckhdq_r2r(r0, r6); /* r6 = h3 g3 f3 e3 = R7 */ \ + movq_m2r(*I(0), r4); /* r4 = a3 a2 a1 a0 */ \ + punpckldq_r2r(r0, r1); /* r1 = h2 g2 f2 e2 = R6 */ \ + movq_m2r(*I(1), r5); /* r5 = b3 b2 b1 b0 */ \ + movq_r2r(r4, r0); /* r0 = a3 a2 a1 a0 */ \ + movq_r2m(r6, *J(7)); \ + punpcklwd_r2r(r5, r0); /* r0 = b1 a1 b0 a0 */ \ + movq_r2m(r1, *J(6)); \ + punpckhwd_r2r(r5, r4); /* r4 = b3 a3 b2 a2 */ \ + movq_r2r(r2, r5); /* r5 = c3 c2 c1 c0 */ \ + punpcklwd_r2r(r3, r2); /* r2 = d1 c1 d0 c0 */ \ + movq_r2r(r0, r1); /* r1 = b1 a1 b0 a0 */ \ + punpckldq_r2r(r2, r0); /* r0 = d0 c0 b0 a0 = R0 */ \ + punpckhdq_r2r(r2, r1); /* r1 = d1 c1 b1 a1 = R1 */ \ + movq_r2r(r4, r2); /* r2 = b3 a3 b2 a2 */ \ + movq_r2m(r0, *I(0)); \ + punpckhwd_r2r(r3, r5); /* r5 = d3 c3 d2 c2 */ \ + movq_r2m(r1, *I(1)); \ + punpckhdq_r2r(r5, r4); /* r4 = d3 c3 b3 a3 = R3 */ \ + punpckldq_r2r(r5, r2); /* r2 = d2 c2 b2 a2 = R2 */ \ + movq_r2m(r4, *I(3)); \ + movq_r2m(r2, *I(2)); \ +} + +void ff_vp3_dsp_init_mmx(void) +{ + int j = 16; + uint16_t *p; + + j = 1; + do { + p = idct_constants + ((j + 3) << 2); + p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1]; + } while (++j <= 7); + + idct_constants[44] = idct_constants[45] = + idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift; +} + +void ff_vp3_idct_mmx(int16_t *output_data) +{ + /* eax = quantized input + * ebx = dequantizer matrix + * ecx = IDCT constants + * M(I) = ecx + MaskOffset(0) + I * 8 + * C(I) = ecx + CosineOffset(32) + (I-1) * 8 + * edx = output + * r0..r7 = mm0..mm7 + */ + +#define C(x) (idct_constants + 16 + (x - 1) * 4) +#define Eight (idct_constants + 44) + + /* at this point, function has completed dequantization + dezigzag + + * partial transposition; now do the idct itself */ +#define I(K) (output_data + K * 8) +#define J(K) (output_data + ((K - 4) * 8) + 4) + + RowIDCT(); + Transpose(); + +#undef I +#undef J +#define I(K) (output_data + (K * 8) + 32) +#define J(K) (output_data + ((K - 4) * 8) + 36) + + RowIDCT(); + Transpose(); + +#undef I +#undef J +#define I(K) (output_data + K * 8) +#define J(K) (output_data + K * 8) + + ColumnIDCT(); + +#undef I +#undef J +#define I(K) (output_data + (K * 8) + 4) +#define J(K) (output_data + (K * 8) + 4) + + ColumnIDCT(); + +#undef I +#undef J + +} diff --git a/mpeg4/src/libavcodec/i386/vp3dsp_sse2.c b/mpeg4/src/libavcodec/i386/vp3dsp_sse2.c new file mode 100644 index 0000000000000000000000000000000000000000..cf822f7d48c8a0f99a3d499e0bfb841233840374 --- /dev/null +++ b/mpeg4/src/libavcodec/i386/vp3dsp_sse2.c @@ -0,0 +1,825 @@ +/* + * Copyright (C) 2004 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file vp3dsp_sse2.c + * SSE2-optimized functions cribbed from the original VP3 source code. + */ + +#include "../dsputil.h" +#include "mmx.h" + +static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) = +{ + 0,65535,65535,0,0,0,0,0, // 0x0000 0000 0000 0000 0000 FFFF FFFF 0000 + 0,0,0,0,65535,65535,0,0, // 0x0000 0000 FFFF FFFF 0000 0000 0000 0000 + 65535,65535,65535,0,0,0,0,0,// 0x0000 0000 0000 0000 0000 FFFF FFFF FFFF + 0,0,0,65535,0,0,0,0, // 0x0000 0000 0000 0000 FFFF 0000 0000 0000 + 0,0,0,65535,65535,0,0,0, // 0x0000 0000 0000 FFFF FFFF 0000 0000 0000 + 65535,0,0,0,0,65535,0,0, // 0x0000 0000 FFFF 0000 0000 0000 0000 FFFF + 0,0,65535,65535, 0,0,0,0 // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000 +}; + +static DECLARE_ALIGNED_16(const unsigned int, eight_data[]) = +{ + 0x00080008, + 0x00080008, + 0x00080008, + 0x00080008 +}; + +static DECLARE_ALIGNED_16(const unsigned short, SSE2_idct_data[7 * 8]) = +{ + 64277,64277,64277,64277,64277,64277,64277,64277, + 60547,60547,60547,60547,60547,60547,60547,60547, + 54491,54491,54491,54491,54491,54491,54491,54491, + 46341,46341,46341,46341,46341,46341,46341,46341, + 36410,36410,36410,36410,36410,36410,36410,36410, + 25080,25080,25080,25080,25080,25080,25080,25080, + 12785,12785,12785,12785,12785,12785,12785,12785 +}; + + +#define SSE2_Column_IDCT() { \ + \ + movdqu_m2r(*I(3), xmm2); /* xmm2 = i3 */ \ + movdqu_m2r(*C(3), xmm6); /* xmm6 = c3 */ \ + \ + movdqu_r2r(xmm2, xmm4); /* xmm4 = i3 */ \ + movdqu_m2r(*I(5), xmm7); /* xmm7 = i5 */ \ + \ + pmulhw_r2r(xmm6, xmm4); /* xmm4 = c3 * i3 - i3 */ \ + movdqu_m2r(*C(5), xmm1); /* xmm1 = c5 */ \ + \ + pmulhw_r2r(xmm7, xmm6); /* xmm6 = c3 * i5 - i5 */ \ + movdqu_r2r(xmm1, xmm5); /* xmm5 = c5 */ \ + \ + pmulhw_r2r(xmm2, xmm1); /* xmm1 = c5 * i3 - i3 */ \ + movdqu_m2r(*I(1), xmm3); /* xmm3 = i1 */ \ + \ + pmulhw_r2r(xmm7, xmm5); /* xmm5 = c5 * i5 - i5 */ \ + movdqu_m2r(*C(1), xmm0); /* xmm0 = c1 */ \ + \ + /* all registers are in use */ \ + \ + paddw_r2r(xmm2, xmm4); /* xmm4 = c3 * i3 */ \ + paddw_r2r(xmm7, xmm6); /* xmm6 = c3 * i5 */ \ + \ + paddw_r2r(xmm1, xmm2); /* xmm2 = c5 * i3 */ \ + movdqu_m2r(*I(7), xmm1); /* xmm1 = i7 */ \ + \ + paddw_r2r(xmm5, xmm7); /* xmm7 = c5 * i5 */ \ + movdqu_r2r(xmm0, xmm5); /* xmm5 = c1 */ \ + \ + pmulhw_r2r(xmm3, xmm0); /* xmm0 = c1 * i1 - i1 */ \ + paddsw_r2r(xmm7, xmm4); /* xmm4 = c3 * i3 + c5 * i5 = C */ \ + \ + pmulhw_r2r(xmm1, xmm5); /* xmm5 = c1 * i7 - i7 */ \ + movdqu_m2r(*C(7), xmm7); /* xmm7 = c7 */ \ + \ + psubsw_r2r(xmm2, xmm6); /* xmm6 = c3 * i5 - c5 * i3 = D */ \ + paddw_r2r(xmm3, xmm0); /* xmm0 = c1 * i1 */ \ + \ + pmulhw_r2r(xmm7, xmm3); /* xmm3 = c7 * i1 */ \ + movdqu_m2r(*I(2), xmm2); /* xmm2 = i2 */ \ + \ + pmulhw_r2r(xmm1, xmm7); /* xmm7 = c7 * i7 */ \ + paddw_r2r(xmm1, xmm5); /* xmm5 = c1 * i7 */ \ + \ + movdqu_r2r(xmm2, xmm1); /* xmm1 = i2 */ \ + pmulhw_m2r(*C(2), xmm2); /* xmm2 = i2 * c2 -i2 */ \ + \ + psubsw_r2r(xmm5, xmm3); /* xmm3 = c7 * i1 - c1 * i7 = B */ \ + movdqu_m2r(*I(6), xmm5); /* xmm5 = i6 */ \ + \ + paddsw_r2r(xmm7, xmm0); /* xmm0 = c1 * i1 + c7 * i7 = A */ \ + movdqu_r2r(xmm5, xmm7); /* xmm7 = i6 */ \ + \ + psubsw_r2r(xmm4, xmm0); /* xmm0 = A - C */ \ + pmulhw_m2r(*C(2), xmm5); /* xmm5 = c2 * i6 - i6 */ \ + \ + paddw_r2r(xmm1, xmm2); /* xmm2 = i2 * c2 */ \ + pmulhw_m2r(*C(6), xmm1); /* xmm1 = c6 * i2 */ \ + \ + paddsw_r2r(xmm4, xmm4); /* xmm4 = C + C */ \ + paddsw_r2r(xmm0, xmm4); /* xmm4 = A + C = C. */ \ + \ + psubsw_r2r(xmm6, xmm3); /* xmm3 = B - D */ \ + paddw_r2r(xmm7, xmm5); /* xmm5 = c2 * i6 */ \ + \ + paddsw_r2r(xmm6, xmm6); /* xmm6 = D + D */ \ + pmulhw_m2r(*C(6), xmm7); /* xmm7 = c6 * i6 */ \ + \ + paddsw_r2r(xmm3, xmm6); /* xmm6 = B + D = D. */ \ + movdqu_r2m(xmm4, *I(1)); /* Save C. at I(1) */ \ + \ + psubsw_r2r(xmm5, xmm1); /* xmm1 = c6 * i2 - c2 * i6 = H */ \ + movdqu_m2r(*C(4), xmm4); /* xmm4 = c4 */ \ + \ + movdqu_r2r(xmm3, xmm5); /* xmm5 = B - D */ \ + pmulhw_r2r(xmm4, xmm3); /* xmm3 = ( c4 -1 ) * ( B - D ) */ \ + \ + paddsw_r2r(xmm2, xmm7); /* xmm7 = c2 * i2 + c6 * i6 = G */ \ + movdqu_r2m(xmm6, *I(2)); /* Save D. at I(2) */ \ + \ + movdqu_r2r(xmm0, xmm2); /* xmm2 = A - C */ \ + movdqu_m2r(*I(0), xmm6); /* xmm6 = i0 */ \ + \ + pmulhw_r2r(xmm4, xmm0); /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \ + paddw_r2r(xmm3, xmm5); /* xmm5 = c4 * ( B - D ) = B. */ \ + \ + movdqu_m2r(*I(4), xmm3); /* xmm3 = i4 */ \ + psubsw_r2r(xmm1, xmm5); /* xmm5 = B. - H = B.. */ \ + \ + paddw_r2r(xmm0, xmm2); /* xmm2 = c4 * ( A - C) = A. */ \ + psubsw_r2r(xmm3, xmm6); /* xmm6 = i0 - i4 */ \ + \ + movdqu_r2r(xmm6, xmm0); /* xmm0 = i0 - i4 */ \ + pmulhw_r2r(xmm4, xmm6); /* xmm6 = (c4 - 1) * (i0 - i4) = F */ \ + \ + paddsw_r2r(xmm3, xmm3); /* xmm3 = i4 + i4 */ \ + paddsw_r2r(xmm1, xmm1); /* xmm1 = H + H */ \ + \ + paddsw_r2r(xmm0, xmm3); /* xmm3 = i0 + i4 */ \ + paddsw_r2r(xmm5, xmm1); /* xmm1 = B. + H = H. */ \ + \ + pmulhw_r2r(xmm3, xmm4); /* xmm4 = ( c4 - 1 ) * ( i0 + i4 ) */ \ + paddw_r2r(xmm0, xmm6); /* xmm6 = c4 * ( i0 - i4 ) */ \ + \ + psubsw_r2r(xmm2, xmm6); /* xmm6 = F - A. = F. */ \ + paddsw_r2r(xmm2, xmm2); /* xmm2 = A. + A. */ \ + \ + movdqu_m2r(*I(1), xmm0); /* Load C. from I(1) */ \ + paddsw_r2r(xmm6, xmm2); /* xmm2 = F + A. = A.. */ \ + \ + paddw_r2r(xmm3, xmm4); /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \ + psubsw_r2r(xmm1, xmm2); /* xmm2 = A.. - H. = R2 */ \ + \ + paddsw_m2r(*Eight, xmm2); /* Adjust R2 and R1 before shifting */ \ + paddsw_r2r(xmm1, xmm1); /* xmm1 = H. + H. */ \ + \ + paddsw_r2r(xmm2, xmm1); /* xmm1 = A.. + H. = R1 */ \ + psraw_i2r(4, xmm2); /* xmm2 = op2 */ \ + \ + psubsw_r2r(xmm7, xmm4); /* xmm4 = E - G = E. */ \ + psraw_i2r(4, xmm1); /* xmm1 = op1 */ \ + \ + movdqu_m2r(*I(2), xmm3); /* Load D. from I(2) */ \ + paddsw_r2r(xmm7, xmm7); /* xmm7 = G + G */ \ + \ + movdqu_r2m(xmm2, *O(2)); /* Write out op2 */ \ + paddsw_r2r(xmm4, xmm7); /* xmm7 = E + G = G. */ \ + \ + movdqu_r2m(xmm1, *O(1)); /* Write out op1 */ \ + psubsw_r2r(xmm3, xmm4); /* xmm4 = E. - D. = R4 */ \ + \ + paddsw_m2r(*Eight, xmm4); /* Adjust R4 and R3 before shifting */ \ + paddsw_r2r(xmm3, xmm3); /* xmm3 = D. + D. */ \ + \ + paddsw_r2r(xmm4, xmm3); /* xmm3 = E. + D. = R3 */ \ + psraw_i2r(4, xmm4); /* xmm4 = op4 */ \ + \ + psubsw_r2r(xmm5, xmm6); /* xmm6 = F. - B..= R6 */ \ + psraw_i2r(4, xmm3); /* xmm3 = op3 */ \ + \ + paddsw_m2r(*Eight, xmm6); /* Adjust R6 and R5 before shifting */ \ + paddsw_r2r(xmm5, xmm5); /* xmm5 = B.. + B.. */ \ + \ + paddsw_r2r(xmm6, xmm5); /* xmm5 = F. + B.. = R5 */ \ + psraw_i2r(4, xmm6); /* xmm6 = op6 */ \ + \ + movdqu_r2m(xmm4, *O(4)); /* Write out op4 */ \ + psraw_i2r(4, xmm5); /* xmm5 = op5 */ \ + \ + movdqu_r2m(xmm3, *O(3)); /* Write out op3 */ \ + psubsw_r2r(xmm0, xmm7); /* xmm7 = G. - C. = R7 */ \ + \ + paddsw_m2r(*Eight, xmm7); /* Adjust R7 and R0 before shifting */ \ + paddsw_r2r(xmm0, xmm0); /* xmm0 = C. + C. */ \ + \ + paddsw_r2r(xmm7, xmm0); /* xmm0 = G. + C. */ \ + psraw_i2r(4, xmm7); /* xmm7 = op7 */ \ + \ + movdqu_r2m(xmm6, *O(6)); /* Write out op6 */ \ + psraw_i2r(4, xmm0); /* xmm0 = op0 */ \ + \ + movdqu_r2m(xmm5, *O(5)); /* Write out op5 */ \ + movdqu_r2m(xmm7, *O(7)); /* Write out op7 */ \ + \ + movdqu_r2m(xmm0, *O(0)); /* Write out op0 */ \ + \ +} /* End of SSE2_Column_IDCT macro */ + + +#define SSE2_Row_IDCT() { \ + \ + movdqu_m2r(*I(3), xmm2); /* xmm2 = i3 */ \ + movdqu_m2r(*C(3), xmm6); /* xmm6 = c3 */ \ + \ + movdqu_r2r(xmm2, xmm4); /* xmm4 = i3 */ \ + movdqu_m2r(*I(5), xmm7); /* xmm7 = i5 */ \ + \ + pmulhw_r2r(xmm6, xmm4); /* xmm4 = c3 * i3 - i3 */ \ + movdqu_m2r(*C(5), xmm1); /* xmm1 = c5 */ \ + \ + pmulhw_r2r(xmm7, xmm6); /* xmm6 = c3 * i5 - i5 */ \ + movdqu_r2r(xmm1, xmm5); /* xmm5 = c5 */ \ + \ + pmulhw_r2r(xmm2, xmm1); /* xmm1 = c5 * i3 - i3 */ \ + movdqu_m2r(*I(1), xmm3); /* xmm3 = i1 */ \ + \ + pmulhw_r2r(xmm7, xmm5); /* xmm5 = c5 * i5 - i5 */ \ + movdqu_m2r(*C(1), xmm0); /* xmm0 = c1 */ \ + \ + /* all registers are in use */ \ + \ + paddw_r2r(xmm2, xmm4); /* xmm4 = c3 * i3 */ \ + paddw_r2r(xmm7, xmm6); /* xmm6 = c3 * i5 */ \ + \ + paddw_r2r(xmm1, xmm2); /* xmm2 = c5 * i3 */ \ + movdqu_m2r(*I(7), xmm1); /* xmm1 = i7 */ \ + \ + paddw_r2r(xmm5, xmm7); /* xmm7 = c5 * i5 */ \ + movdqu_r2r(xmm0, xmm5); /* xmm5 = c1 */ \ + \ + pmulhw_r2r(xmm3, xmm0); /* xmm0 = c1 * i1 - i1 */ \ + paddsw_r2r(xmm7, xmm4); /* xmm4 = c3 * i3 + c5 * i5 = C */ \ + \ + pmulhw_r2r(xmm1, xmm5); /* xmm5 = c1 * i7 - i7 */ \ + movdqu_m2r(*C(7), xmm7); /* xmm7 = c7 */ \ + \ + psubsw_r2r(xmm2, xmm6); /* xmm6 = c3 * i5 - c5 * i3 = D */ \ + paddw_r2r(xmm3, xmm0); /* xmm0 = c1 * i1 */ \ + \ + pmulhw_r2r(xmm7, xmm3); /* xmm3 = c7 * i1 */ \ + movdqu_m2r(*I(2), xmm2); /* xmm2 = i2 */ \ + \ + pmulhw_r2r(xmm1, xmm7); /* xmm7 = c7 * i7 */ \ + paddw_r2r(xmm1, xmm5); /* xmm5 = c1 * i7 */ \ + \ + movdqu_r2r(xmm2, xmm1); /* xmm1 = i2 */ \ + pmulhw_m2r(*C(2), xmm2); /* xmm2 = i2 * c2 -i2 */ \ + \ + psubsw_r2r(xmm5, xmm3); /* xmm3 = c7 * i1 - c1 * i7 = B */ \ + movdqu_m2r(*I(6), xmm5); /* xmm5 = i6 */ \ + \ + paddsw_r2r(xmm7, xmm0); /* xmm0 = c1 * i1 + c7 * i7 = A */ \ + movdqu_r2r(xmm5, xmm7); /* xmm7 = i6 */ \ + \ + psubsw_r2r(xmm4, xmm0); /* xmm0 = A - C */ \ + pmulhw_m2r(*C(2), xmm5); /* xmm5 = c2 * i6 - i6 */ \ + \ + paddw_r2r(xmm1, xmm2); /* xmm2 = i2 * c2 */ \ + pmulhw_m2r(*C(6), xmm1); /* xmm1 = c6 * i2 */ \ + \ + paddsw_r2r(xmm4, xmm4); /* xmm4 = C + C */ \ + paddsw_r2r(xmm0, xmm4); /* xmm4 = A + C = C. */ \ + \ + psubsw_r2r(xmm6, xmm3); /* xmm3 = B - D */ \ + paddw_r2r(xmm7, xmm5); /* xmm5 = c2 * i6 */ \ + \ + paddsw_r2r(xmm6, xmm6); /* xmm6 = D + D */ \ + pmulhw_m2r(*C(6), xmm7); /* xmm7 = c6 * i6 */ \ + \ + paddsw_r2r(xmm3, xmm6); /* xmm6 = B + D = D. */ \ + movdqu_r2m(xmm4, *I(1)); /* Save C. at I(1) */ \ + \ + psubsw_r2r(xmm5, xmm1); /* xmm1 = c6 * i2 - c2 * i6 = H */ \ + movdqu_m2r(*C(4), xmm4); /* xmm4 = c4 */ \ + \ + movdqu_r2r(xmm3, xmm5); /* xmm5 = B - D */ \ + pmulhw_r2r(xmm4, xmm3); /* xmm3 = ( c4 -1 ) * ( B - D ) */ \ + \ + paddsw_r2r(xmm2, xmm7); /* xmm7 = c2 * i2 + c6 * i6 = G */ \ + movdqu_r2m(xmm6, *I(2)); /* Save D. at I(2) */ \ + \ + movdqu_r2r(xmm0, xmm2); /* xmm2 = A - C */ \ + movdqu_m2r(*I(0), xmm6); /* xmm6 = i0 */ \ + \ + pmulhw_r2r(xmm4, xmm0); /* xmm0 = ( c4 - 1 ) * ( A - C ) = A. */ \ + paddw_r2r(xmm3, xmm5); /* xmm5 = c4 * ( B - D ) = B. */ \ + \ + movdqu_m2r(*I(4), xmm3); /* xmm3 = i4 */ \ + psubsw_r2r(xmm1, xmm5); /* xmm5 = B. - H = B.. */ \ + \ + paddw_r2r(xmm0, xmm2); /* xmm2 = c4 * ( A - C) = A. */ \ + psubsw_r2r(xmm3, xmm6); /* xmm6 = i0 - i4 */ \ + \ + movdqu_r2r(xmm6, xmm0); /* xmm0 = i0 - i4 */ \ + pmulhw_r2r(xmm4, xmm6); /* xmm6 = ( c4 - 1 ) * ( i0 - i4 ) = F */ \ + \ + paddsw_r2r(xmm3, xmm3); /* xmm3 = i4 + i4 */ \ + paddsw_r2r(xmm1, xmm1); /* xmm1 = H + H */ \ + \ + paddsw_r2r(xmm0, xmm3); /* xmm3 = i0 + i4 */ \ + paddsw_r2r(xmm5, xmm1); /* xmm1 = B. + H = H. */ \ + \ + pmulhw_r2r(xmm3, xmm4); /* xmm4 = ( c4 - 1 ) * ( i0 + i4 ) */ \ + paddw_r2r(xmm0, xmm6); /* xmm6 = c4 * ( i0 - i4 ) */ \ + \ + psubsw_r2r(xmm2, xmm6); /* xmm6 = F - A. = F. */ \ + paddsw_r2r(xmm2, xmm2); /* xmm2 = A. + A. */ \ + \ + movdqu_m2r(*I(1), xmm0); /* Load C. from I(1) */ \ + paddsw_r2r(xmm6, xmm2); /* xmm2 = F + A. = A.. */ \ + \ + paddw_r2r(xmm3, xmm4); /* xmm4 = c4 * ( i0 + i4 ) = 3 */ \ + psubsw_r2r(xmm1, xmm2); /* xmm2 = A.. - H. = R2 */ \ + \ + paddsw_r2r(xmm1, xmm1); /* xmm1 = H. + H. */ \ + paddsw_r2r(xmm2, xmm1); /* xmm1 = A.. + H. = R1 */ \ + \ + psubsw_r2r(xmm7, xmm4); /* xmm4 = E - G = E. */ \ + \ + movdqu_m2r(*I(2), xmm3); /* Load D. from I(2) */ \ + paddsw_r2r(xmm7, xmm7); /* xmm7 = G + G */ \ + \ + movdqu_r2m(xmm2, *I(2)); /* Write out op2 */ \ + paddsw_r2r(xmm4, xmm7); /* xmm7 = E + G = G. */ \ + \ + movdqu_r2m(xmm1, *I(1)); /* Write out op1 */ \ + psubsw_r2r(xmm3, xmm4); /* xmm4 = E. - D. = R4 */ \ + \ + paddsw_r2r(xmm3, xmm3); /* xmm3 = D. + D. */ \ + \ + paddsw_r2r(xmm4, xmm3); /* xmm3 = E. + D. = R3 */ \ + \ + psubsw_r2r(xmm5, xmm6); /* xmm6 = F. - B..= R6 */ \ + \ + paddsw_r2r(xmm5, xmm5); /* xmm5 = B.. + B.. */ \ + \ + paddsw_r2r(xmm6, xmm5); /* xmm5 = F. + B.. = R5 */ \ + \ + movdqu_r2m(xmm4, *I(4)); /* Write out op4 */ \ + \ + movdqu_r2m(xmm3, *I(3)); /* Write out op3 */ \ + psubsw_r2r(xmm0, xmm7); /* xmm7 = G. - C. = R7 */ \ + \ + paddsw_r2r(xmm0, xmm0); /* xmm0 = C. + C. */ \ + \ + paddsw_r2r(xmm7, xmm0); /* xmm0 = G. + C. */ \ + \ + movdqu_r2m(xmm6, *I(6)); /* Write out op6 */ \ + \ + movdqu_r2m(xmm5, *I(5)); /* Write out op5 */ \ + movdqu_r2m(xmm7, *I(7)); /* Write out op7 */ \ + \ + movdqu_r2m(xmm0, *I(0)); /* Write out op0 */ \ + \ +} /* End of SSE2_Row_IDCT macro */ + + +#define SSE2_Transpose() { \ + \ + movdqu_m2r(*I(4), xmm4); /* xmm4=e7e6e5e4e3e2e1e0 */ \ + movdqu_m2r(*I(5), xmm0); /* xmm4=f7f6f5f4f3f2f1f0 */ \ + \ + movdqu_r2r(xmm4, xmm5); /* make a copy */ \ + punpcklwd_r2r(xmm0, xmm4); /* xmm4=f3e3f2e2f1e1f0e0 */ \ + \ + punpckhwd_r2r(xmm0, xmm5); /* xmm5=f7e7f6e6f5e5f4e4 */ \ + movdqu_m2r(*I(6), xmm6); /* xmm6=g7g6g5g4g3g2g1g0 */ \ + \ + movdqu_m2r(*I(7), xmm0); /* xmm0=h7h6h5h4h3h2h1h0 */ \ + movdqu_r2r(xmm6, xmm7); /* make a copy */ \ + \ + punpcklwd_r2r(xmm0, xmm6); /* xmm6=h3g3h3g2h1g1h0g0 */ \ + punpckhwd_r2r(xmm0, xmm7); /* xmm7=h7g7h6g6h5g5h4g4 */ \ + \ + movdqu_r2r(xmm4, xmm3); /* make a copy */ \ + punpckldq_r2r(xmm6, xmm4); /* xmm4=h1g1f1e1h0g0f0e0 */ \ + \ + punpckhdq_r2r(xmm6, xmm3); /* xmm3=h3g3g3e3h2g2f2e2 */ \ + movdqu_r2m(xmm3, *I(6)); /* save h3g3g3e3h2g2f2e2 */ \ + /* Free xmm6 */ \ + movdqu_r2r(xmm5, xmm6); /* make a copy */ \ + punpckldq_r2r(xmm7, xmm5); /* xmm5=h5g5f5e5h4g4f4e4 */ \ + \ + punpckhdq_r2r(xmm7, xmm6); /* xmm6=h7g7f7e7h6g6f6e6 */ \ + movdqu_m2r(*I(0), xmm0); /* xmm0=a7a6a5a4a3a2a1a0 */ \ + /* Free xmm7 */ \ + movdqu_m2r(*I(1), xmm1); /* xmm1=b7b6b5b4b3b2b1b0 */ \ + movdqu_r2r(xmm0, xmm7); /* make a copy */ \ + \ + punpcklwd_r2r(xmm1, xmm0); /* xmm0=b3a3b2a2b1a1b0a0 */ \ + punpckhwd_r2r(xmm1, xmm7); /* xmm7=b7a7b6a6b5a5b4a4 */ \ + /* Free xmm1 */ \ + movdqu_m2r(*I(2), xmm2); /* xmm2=c7c6c5c4c3c2c1c0 */ \ + movdqu_m2r(*I(3), xmm3); /* xmm3=d7d6d5d4d3d2d1d0 */ \ + \ + movdqu_r2r(xmm2, xmm1); /* make a copy */ \ + punpcklwd_r2r(xmm3, xmm2); /* xmm2=d3c3d2c2d1c1d0c0 */ \ + \ + punpckhwd_r2r(xmm3, xmm1); /* xmm1=d7c7d6c6d5c5d4c4 */ \ + movdqu_r2r(xmm0, xmm3); /* make a copy */ \ + \ + punpckldq_r2r(xmm2, xmm0); /* xmm0=d1c1b1a1d0c0b0a0 */ \ + punpckhdq_r2r(xmm2, xmm3); /* xmm3=d3c3b3a3d2c2b2a2 */ \ + /* Free xmm2 */ \ + movdqu_r2r(xmm7, xmm2); /* make a copy */ \ + punpckldq_r2r(xmm1, xmm2); /* xmm2=d5c5b5a5d4c4b4a4 */ \ + \ + punpckhdq_r2r(xmm1, xmm7); /* xmm7=d7c7b7a7d6c6b6a6 */ \ + movdqu_r2r(xmm0, xmm1); /* make a copy */ \ + \ + punpcklqdq_r2r(xmm4, xmm0); /* xmm0=h0g0f0e0d0c0b0a0 */ \ + punpckhqdq_r2r(xmm4, xmm1); /* xmm1=h1g1g1e1d1c1b1a1 */ \ + \ + movdqu_r2m(xmm0, *I(0)); /* save I(0) */ \ + movdqu_r2m(xmm1, *I(1)); /* save I(1) */ \ + \ + movdqu_m2r(*I(6), xmm0); /* load h3g3g3e3h2g2f2e2 */ \ + movdqu_r2r(xmm3, xmm1); /* make a copy */ \ + \ + punpcklqdq_r2r(xmm0, xmm1); /* xmm1=h2g2f2e2d2c2b2a2 */ \ + punpckhqdq_r2r(xmm0, xmm3); /* xmm3=h3g3f3e3d3c3b3a3 */ \ + \ + movdqu_r2r(xmm2, xmm4); /* make a copy */ \ + punpcklqdq_r2r(xmm5, xmm4); /* xmm4=h4g4f4e4d4c4b4a4 */ \ + \ + punpckhqdq_r2r(xmm5, xmm2); /* xmm2=h5g5f5e5d5c5b5a5 */ \ + movdqu_r2m(xmm1, *I(2)); /* save I(2) */ \ + \ + movdqu_r2m(xmm3, *I(3)); /* save I(3) */ \ + movdqu_r2m(xmm4, *I(4)); /* save I(4) */ \ + \ + movdqu_r2m(xmm2, *I(5)); /* save I(5) */ \ + movdqu_r2r(xmm7, xmm5); /* make a copy */ \ + \ + punpcklqdq_r2r(xmm6, xmm5); /* xmm5=h6g6f6e6d6c6b6a6 */ \ + punpckhqdq_r2r(xmm6, xmm7); /* xmm7=h7g7f7e7d7c7b7a7 */ \ + \ + movdqu_r2m(xmm5, *I(6)); /* save I(6) */ \ + movdqu_r2m(xmm7, *I(7)); /* save I(7) */ \ + \ +} /* End of Transpose Macro */ + + +#define SSE2_Dequantize() { \ + movdqu_m2r(*(eax), xmm0); \ + \ + pmullw_m2r(*(ebx), xmm0); /* xmm0 = 07 06 05 04 03 02 01 00 */ \ + movdqu_m2r(*(eax + 16), xmm1); \ + \ + pmullw_m2r(*(ebx + 16), xmm1); /* xmm1 = 17 16 15 14 13 12 11 10 */ \ + pshuflw_r2r(xmm0, xmm3, 0x078); /* xmm3 = 07 06 05 04 01 03 02 00 */ \ + \ + movdqu_r2r(xmm1, xmm2); /* xmm2 = 17 16 15 14 13 12 11 10 */ \ + movdqu_m2r(*(ecx), xmm7); /* xmm7 = -- -- -- -- -- FF FF -- */ \ + \ + movdqu_m2r(*(eax + 32), xmm4); \ + movdqu_m2r(*(eax + 64), xmm5); \ + \ + pmullw_m2r(*(ebx + 32), xmm4); /* xmm4 = 27 26 25 24 23 22 21 20 */ \ + pmullw_m2r(*(ebx + 64), xmm5); /* xmm5 = 47 46 45 44 43 42 41 40 */ \ + \ + movdqu_m2r(*(ecx + 16), xmm6); /* xmm6 = -- -- FF FF -- -- -- -- */ \ + pand_r2r(xmm2, xmm7); /* xmm7 = -- -- -- -- -- 12 11 -- */ \ + \ + pand_r2r(xmm4, xmm6); /* xmm6 = -- -- 25 24 -- -- -- -- */ \ + pxor_r2r(xmm7, xmm2); /* xmm2 = 17 16 15 14 13 -- -- 10 */ \ + \ + pxor_r2r(xmm6, xmm4); /* xmm4 = 27 26 -- -- 23 22 21 20 */ \ + pslldq_i2r(4, xmm7); /* xmm7 = -- -- -- 12 11 -- -- -- */ \ + \ + pslldq_i2r(2, xmm6); /* xmm6 = -- 25 24 -- -- -- -- -- */ \ + por_r2r(xmm6, xmm7); /* xmm7 = -- 25 24 12 11 -- -- -- */ \ + \ + movdqu_m2r(*(ecx + 32), xmm0); /* xmm0 = -- -- -- -- -- FF FF FF */ \ + movdqu_m2r(*(ecx + 48), xmm6); /* xmm6 = -- -- -- -- FF -- -- -- */ \ + \ + pand_r2r(xmm3, xmm0); /* xmm0 = -- -- -- -- -- 03 02 00 */ \ + pand_r2r(xmm5, xmm6); /* xmm6 = -- -- -- -- 43 -- -- -- */ \ + \ + pxor_r2r(xmm0, xmm3); /* xmm3 = 07 06 05 04 01 -- -- -- */ \ + pxor_r2r(xmm6, xmm5); /* xmm5 = 47 46 45 44 -- 42 41 40 */ \ + \ + por_r2r(xmm7, xmm0); /* xmm0 = -- 25 24 12 11 03 02 00 */ \ + pslldq_i2r(8, xmm6); /* xmm6 = 43 -- -- -- -- -- -- -- */ \ + \ + por_r2r(xmm6, xmm0); /* xmm0 = 43 25 24 12 11 03 02 00 */ \ + /* 02345 in use */ \ + \ + movdqu_m2r(*(ecx + 64 ), xmm1); /* xmm1 = -- -- -- FF FF -- -- -- */ \ + pshuflw_r2r(xmm5, xmm5, 0x0B4); /* xmm5 = 47 46 45 44 42 -- 41 40 */ \ + \ + movdqu_r2r(xmm1, xmm7); /* xmm7 = -- -- -- FF FF -- -- -- */ \ + movdqu_r2r(xmm1, xmm6); /* xmm6 = -- -- -- FF FF -- -- -- */ \ + \ + movdqu_r2m(xmm0, *(eax)); /* write 43 25 24 12 11 03 02 00 */ \ + pshufhw_r2r(xmm4, xmm4, 0x0C2); /* xmm4 = 27 -- -- 26 23 22 21 20 */ \ + \ + pand_r2r(xmm4, xmm7); /* xmm7 = -- -- -- 26 23 -- -- -- */ \ + pand_r2r(xmm5, xmm1); /* xmm1 = -- -- -- 44 42 -- -- -- */ \ + \ + pxor_r2r(xmm7, xmm4); /* xmm4 = 27 -- -- -- -- 22 21 20 */ \ + pxor_r2r(xmm1, xmm5); /* xmm5 = 47 46 45 -- -- -- 41 40 */ \ + \ + pshuflw_r2r(xmm2, xmm2, 0x0C6); /* xmm2 = 17 16 15 14 13 10 -- -- */ \ + movdqu_r2r(xmm6, xmm0); /* xmm0 = -- -- -- FF FF -- -- -- */ \ + \ + pslldq_i2r(2, xmm7); /* xmm7 = -- -- 26 23 -- -- -- -- */ \ + pslldq_i2r(6, xmm1); /* xmm1 = 44 42 -- -- -- -- -- -- */ \ + \ + psrldq_i2r(2, xmm0); /* xmm0 = -- -- -- -- FF FF -- -- */ \ + pand_r2r(xmm3, xmm6); /* xmm6 = -- -- -- 04 01 -- -- -- */ \ + \ + pand_r2r(xmm2, xmm0); /* xmm0 = -- -- -- -- 13 10 -- -- */ \ + pxor_r2r(xmm6, xmm3); /* xmm3 = 07 06 05 -- -- -- -- -- */ \ + \ + pxor_r2r(xmm0, xmm2); /* xmm2 = 17 16 15 14 -- -- -- -- */ \ + psrldq_i2r(6, xmm6); /* xmm0 = -- -- -- -- -- -- 04 01 */ \ + \ + por_r2r(xmm7, xmm1); /* xmm1 = 44 42 26 23 -- -- -- -- */ \ + por_r2r(xmm6, xmm0); /* xmm1 = -- -- -- -- 13 10 04 01 */ \ + /* 12345 in use */ \ + por_r2r(xmm0, xmm1); /* xmm1 = 44 42 26 23 13 10 04 01 */ \ + pshuflw_r2r(xmm4, xmm4, 0x093); /* xmm4 = 27 -- -- -- 22 21 20 -- */ \ + \ + pshufhw_r2r(xmm4, xmm4, 0x093); /* xmm4 = -- -- -- 27 22 21 20 -- */ \ + movdqu_r2m(xmm1, *(eax + 16)); /* write 44 42 26 23 13 10 04 01 */ \ + \ + pshufhw_r2r(xmm3, xmm3, 0x0D2); /* xmm3 = 07 05 -- 06 -- -- -- -- */ \ + movdqu_m2r(*(ecx + 64), xmm0); /* xmm0 = -- -- -- FF FF -- -- -- */ \ + \ + pand_r2r(xmm3, xmm0); /* xmm0 = -- -- -- 06 -- -- -- -- */ \ + psrldq_i2r(12, xmm3); /* xmm3 = -- -- -- -- -- -- 07 05 */ \ + \ + psrldq_i2r(8, xmm0); /* xmm0 = -- -- -- -- -- -- -- 06 */ \ + \ + movdqu_m2r(*(ecx + 64), xmm6); /* xmm6 = -- -- -- FF FF -- -- -- */ \ + movdqu_m2r(*(ecx + 96), xmm7); /* xmm7 = -- -- -- -- FF FF -- -- */ \ + \ + pand_r2r(xmm4, xmm6); /* xmm6 = -- -- -- 27 22 -- -- -- */ \ + pxor_r2r(xmm6, xmm4); /* xmm4 = -- -- -- -- -- 21 20 -- */ \ + \ + por_r2r(xmm6, xmm3); /* xmm3 = -- -- -- 27 22 -- 07 05 */ \ + pand_r2r(xmm4, xmm7); /* xmm7 = -- -- -- -- -- 21 -- -- */ \ + \ + por_r2r(xmm7, xmm0); /* xmm0 = -- -- -- -- -- 21 -- 06 */ \ + pxor_r2r(xmm7, xmm4); /* xmm4 = -- -- -- -- -- -- 20 -- */ \ + \ + movdqu_m2r(*(ecx + 16 ), xmm6); /* xmm6 = -- -- FF FF -- -- -- -- */ \ + movdqu_m2r(*(ecx + 64 ), xmm1); /* xmm1 = -- -- -- FF FF -- -- -- */ \ + \ + pand_r2r(xmm2, xmm6); /* xmm6 = -- -- 15 14 -- -- -- -- */ \ + pand_r2r(xmm6, xmm1); /* xmm1 = -- -- -- 14 -- -- -- -- */ \ + \ + pxor_r2r(xmm6, xmm2); /* xmm2 = 17 16 -- -- -- -- -- -- */ \ + pxor_r2r(xmm1, xmm6); /* xmm6 = -- -- 15 -- -- -- -- -- */ \ + \ + psrldq_i2r(4, xmm1); /* xmm1 = -- -- -- -- -- 14 -- -- */ \ + \ + psrldq_i2r(8, xmm6); /* xmm6 = -- -- -- -- -- -- 15 -- */ \ + por_r2r(xmm1, xmm3); /* xmm3 = -- -- -- 27 22 14 07 05 */ \ + \ + por_r2r(xmm6, xmm0); /* xmm0 = -- -- -- -- -- 21 15 06 */ \ + pshufhw_r2r(xmm5, xmm5, 0x0E1); /* xmm5 = 47 46 -- 45 -- -- 41 40 */ \ + \ + movdqu_m2r(*(ecx + 64), xmm1); /* xmm1 = -- -- -- FF FF -- -- -- */ \ + pshuflw_r2r(xmm5, xmm5, 0x072); /* xmm5 = 47 46 -- 45 41 -- 40 -- */ \ + \ + movdqu_r2r(xmm1, xmm6); /* xmm6 = -- -- -- FF FF -- -- -- */ \ + pand_r2r(xmm5, xmm1); /* xmm1 = -- -- -- 45 41 -- -- -- */ \ + \ + pxor_r2r(xmm1, xmm5); /* xmm5 = 47 46 -- -- -- -- 40 -- */ \ + pslldq_i2r(4, xmm1); /* xmm1 = -- 45 41 -- -- -- -- -- */ \ + \ + pshufd_r2r(xmm5, xmm5, 0x09C); /* xmm5 = -- -- -- -- 47 46 40 -- */ \ + por_r2r(xmm1, xmm3); /* xmm3 = -- 45 41 27 22 14 07 05 */ \ + \ + movdqu_m2r(*(eax + 96), xmm1); /* xmm1 = 67 66 65 64 63 62 61 60 */ \ + pmullw_m2r(*(ebx + 96), xmm1); \ + \ + movdqu_m2r(*(ecx), xmm7); /* xmm7 = -- -- -- -- -- FF FF -- */ \ + \ + psrldq_i2r(8, xmm6); /* xmm6 = -- -- -- -- -- -- -- FF */ \ + pand_r2r(xmm5, xmm7); /* xmm7 = -- -- -- -- -- 46 40 -- */ \ + \ + pand_r2r(xmm1, xmm6); /* xmm6 = -- -- -- -- -- -- -- 60 */ \ + pxor_r2r(xmm7, xmm5); /* xmm5 = -- -- -- -- 47 -- -- -- */ \ + \ + pxor_r2r(xmm6, xmm1); /* xmm1 = 67 66 65 64 63 62 61 -- */ \ + pslldq_i2r(2, xmm5); /* xmm5 = -- -- -- 47 -- -- -- -- */ \ + \ + pslldq_i2r(14, xmm6); /* xmm6 = 60 -- -- -- -- -- -- -- */ \ + por_r2r(xmm5, xmm4); /* xmm4 = -- -- -- 47 -- -- 20 -- */ \ + \ + por_r2r(xmm6, xmm3); /* xmm3 = 60 45 41 27 22 14 07 05 */ \ + pslldq_i2r(6, xmm7); /* xmm7 = -- -- 46 40 -- -- -- -- */ \ + \ + movdqu_r2m(xmm3, *(eax+32)); /* write 60 45 41 27 22 14 07 05 */ \ + por_r2r(xmm7, xmm0); /* xmm0 = -- -- 46 40 -- 21 15 06 */ \ + /* 0, 1, 2, 4 in use */ \ + movdqu_m2r(*(eax + 48), xmm3); /* xmm3 = 37 36 35 34 33 32 31 30 */ \ + movdqu_m2r(*(eax + 80), xmm5); /* xmm5 = 57 56 55 54 53 52 51 50 */ \ + \ + pmullw_m2r(*(ebx + 48), xmm3); \ + pmullw_m2r(*(ebx + 80), xmm5); \ + \ + movdqu_m2r(*(ecx + 64), xmm6); /* xmm6 = -- -- -- FF FF -- -- -- */ \ + movdqu_m2r(*(ecx + 64), xmm7); /* xmm7 = -- -- -- FF FF -- -- -- */ \ + \ + psrldq_i2r(8, xmm6); /* xmm6 = -- -- -- -- -- -- -- FF */ \ + pslldq_i2r(8, xmm7); /* xmm7 = FF -- -- -- -- -- -- -- */ \ + \ + pand_r2r(xmm3, xmm6); /* xmm6 = -- -- -- -- -- -- -- 30 */ \ + pand_r2r(xmm5, xmm7); /* xmm7 = 57 -- -- -- -- -- -- -- */ \ + \ + pxor_r2r(xmm6, xmm3); /* xmm3 = 37 36 35 34 33 32 31 -- */ \ + pxor_r2r(xmm7, xmm5); /* xmm5 = __ 56 55 54 53 52 51 50 */ \ + \ + pslldq_i2r(6, xmm6); /* xmm6 = -- -- -- -- 30 -- -- -- */ \ + psrldq_i2r(2, xmm7); /* xmm7 = -- 57 -- -- -- -- -- -- */ \ + \ + por_r2r(xmm7, xmm6); /* xmm6 = -- 57 -- -- 30 -- -- -- */ \ + movdqu_m2r(*(ecx), xmm7); /* xmm7 = -- -- -- -- -- FF FF -- */ \ + \ + por_r2r(xmm6, xmm0); /* xmm0 = -- 57 46 40 30 21 15 06 */ \ + psrldq_i2r(2, xmm7); /* xmm7 = -- -- -- -- -- -- FF FF */ \ + \ + movdqu_r2r(xmm2, xmm6); /* xmm6 = 17 16 -- -- -- -- -- -- */ \ + pand_r2r(xmm1, xmm7); /* xmm7 = -- -- -- -- -- -- 61 -- */ \ + \ + pslldq_i2r(2, xmm6); /* xmm6 = 16 -- -- -- -- -- -- -- */ \ + psrldq_i2r(14, xmm2); /* xmm2 = -- -- -- -- -- -- -- 17 */ \ + \ + pxor_r2r(xmm7, xmm1); /* xmm1 = 67 66 65 64 63 62 -- -- */ \ + pslldq_i2r(12, xmm7); /* xmm7 = 61 -- -- -- -- -- -- -- */ \ + \ + psrldq_i2r(14, xmm6); /* xmm6 = -- -- -- -- -- -- -- 16 */ \ + por_r2r(xmm6, xmm4); /* xmm4 = -- -- -- 47 -- -- 20 16 */ \ + \ + por_r2r(xmm7, xmm0); /* xmm0 = 61 57 46 40 30 21 15 06 */ \ + movdqu_m2r(*(ecx), xmm6); /* xmm6 = -- -- -- -- -- FF FF -- */ \ + \ + psrldq_i2r(2, xmm6); /* xmm6 = -- -- -- -- -- -- FF FF */ \ + movdqu_r2m(xmm0, *(eax+48)); /* write 61 57 46 40 30 21 15 06 */ \ + /* 1, 2, 3, 4, 5 in use */\ + movdqu_m2r(*(ecx), xmm0); /* xmm0 = -- -- -- -- -- FF FF -- */ \ + pand_r2r(xmm3, xmm6); /* xmm6 = -- -- -- -- -- -- 31 -- */ \ + \ + movdqu_r2r(xmm3, xmm7); /* xmm7 = 37 36 35 34 33 32 31 -- */ \ + pxor_r2r(xmm6, xmm3); /* xmm3 = 37 36 35 34 33 32 -- -- */ \ + \ + pslldq_i2r(2, xmm3); /* xmm3 = 36 35 34 33 32 -- -- -- */ \ + pand_r2r(xmm1, xmm0); /* xmm0 = -- -- -- -- -- 62 -- -- */ \ + \ + psrldq_i2r(14, xmm7); /* xmm7 = -- -- -- -- -- -- -- 37 */ \ + pxor_r2r(xmm0, xmm1); /* xmm1 = 67 66 65 64 63 -- -- -- */ \ + \ + por_r2r(xmm7, xmm6); /* xmm6 = -- -- -- -- -- -- 31 37 */ \ + movdqu_m2r(*(ecx + 64), xmm7); /* xmm7 = -- -- -- FF FF -- -- -- */ \ + \ + pshuflw_r2r(xmm6, xmm6, 0x01E); /* xmm6 = -- -- -- -- 37 31 -- -- */ \ + pslldq_i2r(6, xmm7); /* xmm7 = FF FF -- -- -- -- -- -- */ \ + \ + por_r2r(xmm6, xmm4); /* xmm4 = -- -- -- 47 37 31 20 16 */ \ + pand_r2r(xmm5, xmm7); /* xmm7 = -- 56 -- -- -- -- -- -- */ \ + \ + pslldq_i2r(8, xmm0); /* xmm0 = -- 62 -- -- -- -- -- -- */ \ + pxor_r2r(xmm7, xmm5); /* xmm5 = -- -- 55 54 53 52 51 50 */ \ + \ + psrldq_i2r(2, xmm7); /* xmm7 = -- -- 56 -- -- -- -- -- */ \ + \ + pshufhw_r2r(xmm3, xmm3, 0x087); /* xmm3 = 35 33 34 36 32 -- -- -- */ \ + por_r2r(xmm7, xmm0); /* xmm0 = -- 62 56 -- -- -- -- -- */ \ + \ + movdqu_m2r(*(eax + 112), xmm7); /* xmm7 = 77 76 75 74 73 72 71 70 */ \ + pmullw_m2r(*(ebx + 112), xmm7); \ + \ + movdqu_m2r(*(ecx + 64), xmm6); /* xmm6 = -- -- -- FF FF -- -- -- */ \ + por_r2r(xmm0, xmm4); /* xmm4 = -- 62 56 47 37 31 20 16 */ \ + \ + pshuflw_r2r(xmm7, xmm7, 0x0E1); /* xmm7 = 77 76 75 74 73 72 70 71 */ \ + psrldq_i2r(8, xmm6); /* xmm6 = -- -- -- -- -- -- -- FF */ \ + \ + movdqu_m2r(*(ecx + 64), xmm0); /* xmm0 = -- -- -- FF FF -- -- -- */ \ + pand_r2r(xmm7, xmm6); /* xmm6 = -- -- -- -- -- -- -- 71 */ \ + \ + pand_r2r(xmm3, xmm0); /* xmm0 = -- -- -- 36 32 -- -- -- */ \ + pxor_r2r(xmm6, xmm7); /* xmm7 = 77 76 75 74 73 72 70 -- */ \ + \ + pxor_r2r(xmm0, xmm3); /* xmm3 = 35 33 34 -- -- -- -- -- */ \ + pslldq_i2r(14, xmm6); /* xmm6 = 71 -- -- -- -- -- -- -- */ \ + \ + psrldq_i2r(4, xmm0); /* xmm0 = -- -- -- -- -- 36 32 -- */ \ + por_r2r(xmm6, xmm4); /* xmm4 = 71 62 56 47 37 31 20 16 */ \ + \ + por_r2r(xmm0, xmm2); /* xmm2 = -- -- -- -- -- 36 32 17 */ \ + movdqu_r2m(xmm4, *(eax + 64)); /* write 71 62 56 47 37 31 20 16 */ \ + /* 1, 2, 3, 5, 7 in use */ \ + movdqu_m2r(*(ecx + 80), xmm6); /* xmm6 = -- -- FF -- -- -- -- FF */ \ + pshufhw_r2r(xmm7, xmm7, 0x0D2); /* xmm7 = 77 75 74 76 73 72 70 __ */ \ + \ + movdqu_m2r(*(ecx), xmm4); /* xmm4 = -- -- -- -- -- FF FF -- */ \ + movdqu_m2r(*(ecx+48), xmm0); /* xmm0 = -- -- -- -- FF -- -- -- */ \ + \ + pand_r2r(xmm5, xmm6); /* xmm6 = -- -- 55 -- -- -- -- 50 */ \ + pand_r2r(xmm7, xmm4); /* xmm4 = -- -- -- -- -- 72 70 -- */ \ + \ + pand_r2r(xmm1, xmm0); /* xmm0 = -- -- -- -- 63 -- -- -- */ \ + pxor_r2r(xmm6, xmm5); /* xmm5 = -- -- -- 54 53 52 51 -- */ \ + \ + pxor_r2r(xmm4, xmm7); /* xmm7 = 77 75 74 76 73 -- -- -- */ \ + pxor_r2r(xmm0, xmm1); /* xmm1 = 67 66 65 64 -- -- -- -- */ \ + \ + pshuflw_r2r(xmm6, xmm6, 0x02B); /* xmm6 = -- -- 55 -- 50 -- -- -- */ \ + pslldq_i2r(10, xmm4); /* xmm4 = 72 20 -- -- -- -- -- -- */ \ + \ + pshufhw_r2r(xmm6, xmm6, 0x0B1); /* xmm6 = -- -- -- 55 50 -- -- -- */ \ + pslldq_i2r(4, xmm0); /* xmm0 = -- -- 63 -- -- -- -- -- */ \ + \ + por_r2r(xmm4, xmm6); /* xmm6 = 72 70 -- 55 50 -- -- -- */ \ + por_r2r(xmm0, xmm2); /* xmm2 = -- -- 63 -- -- 36 32 17 */ \ + \ + por_r2r(xmm6, xmm2); /* xmm2 = 72 70 64 55 50 36 32 17 */ \ + pshufhw_r2r(xmm1, xmm1, 0x0C9); /* xmm1 = 67 64 66 65 -- -- -- -- */ \ + \ + movdqu_r2r(xmm3, xmm6); /* xmm6 = 35 33 34 -- -- -- -- -- */ \ + movdqu_r2m(xmm2, *(eax+80)); /* write 72 70 64 55 50 36 32 17 */ \ + \ + psrldq_i2r(12, xmm6); /* xmm6 = -- -- -- -- -- -- 35 33 */ \ + pslldq_i2r(4, xmm3); /* xmm3 = 34 -- -- -- -- -- -- -- */ \ + \ + pshuflw_r2r(xmm5, xmm5, 0x04E); /* xmm5 = -- -- -- 54 51 -- 53 52 */ \ + movdqu_r2r(xmm7, xmm4); /* xmm4 = 77 75 74 76 73 -- -- -- */ \ + \ + movdqu_r2r(xmm5, xmm2); /* xmm2 = -- -- -- 54 51 -- 53 52 */ \ + psrldq_i2r(10, xmm7); /* xmm7 = -- -- -- -- -- 77 75 74 */ \ + \ + pslldq_i2r(6, xmm4); /* xmm4 = 76 73 -- -- -- -- -- -- */ \ + pslldq_i2r(12, xmm2); /* xmm2 = 53 52 -- -- -- -- -- -- */ \ + \ + movdqu_r2r(xmm1, xmm0); /* xmm0 = 67 64 66 65 -- -- -- -- */ \ + psrldq_i2r(12, xmm1); /* xmm1 = -- -- -- -- -- -- 67 64 */ \ + \ + psrldq_i2r(6, xmm5); /* xmm5 = -- -- -- -- -- -- 54 51 */ \ + psrldq_i2r(14, xmm3); /* xmm3 = -- -- -- -- -- -- -- 34 */ \ + \ + pslldq_i2r(10, xmm7); /* xmm7 = 77 75 74 -- -- -- -- -- */ \ + por_r2r(xmm6, xmm4); /* xmm4 = 76 73 -- -- -- -- 35 33 */ \ + \ + psrldq_i2r(10, xmm2); /* xmm2 = -- -- -- -- -- 53 52 -- */ \ + pslldq_i2r(4, xmm0); /* xmm0 = 66 65 -- -- -- -- -- -- */ \ + \ + pslldq_i2r(8, xmm1); /* xmm1 = -- -- 67 64 -- -- -- -- */ \ + por_r2r(xmm7, xmm3); /* xmm3 = 77 75 74 -- -- -- -- 34 */ \ + \ + psrldq_i2r(6, xmm0); /* xmm0 = -- -- -- 66 65 -- -- -- */ \ + pslldq_i2r(4, xmm5); /* xmm5 = -- -- -- -- 54 51 -- -- */ \ + \ + por_r2r(xmm1, xmm4); /* xmm4 = 76 73 67 64 -- -- 35 33 */ \ + por_r2r(xmm2, xmm3); /* xmm3 = 77 75 74 -- -- 53 52 34 */ \ + \ + por_r2r(xmm5, xmm4); /* xmm4 = 76 73 67 64 54 51 35 33 */ \ + por_r2r(xmm0, xmm3); /* xmm3 = 77 75 74 66 65 53 52 34 */ \ + \ + movdqu_r2m(xmm4, *(eax+96)); /* write 76 73 67 64 54 51 35 33 */ \ + movdqu_r2m(xmm3, *(eax+112)); /* write 77 75 74 66 65 53 52 34 */ \ + \ +} /* end of SSE2_Dequantize Macro */ + + +void ff_vp3_idct_sse2(int16_t *input_data) +{ + unsigned char *input_bytes = (unsigned char *)input_data; + unsigned char *output_data_bytes = (unsigned char *)input_data; + unsigned char *idct_data_bytes = (unsigned char *)SSE2_idct_data; + unsigned char *Eight = (unsigned char *)eight_data; + +#define eax input_bytes +//#define ebx dequant_matrix_bytes +#define ecx dequant_const_bytes +#define edx idct_data_bytes + +#define I(i) (eax + 16 * i) +#define O(i) (ebx + 16 * i) +#define C(i) (edx + 16 * (i-1)) + + // SSE2_Dequantize(); + +#undef ebx +#define ebx output_data_bytes + + SSE2_Row_IDCT(); + + SSE2_Transpose(); + + SSE2_Column_IDCT(); +} diff --git a/mpeg4/src/libavcodec/idcinvideo.c b/mpeg4/src/libavcodec/idcinvideo.c new file mode 100644 index 0000000000000000000000000000000000000000..7e7e6aab1b90fb878a212388f8c1163866d5d307 --- /dev/null +++ b/mpeg4/src/libavcodec/idcinvideo.c @@ -0,0 +1,270 @@ +/* + * Id Quake II CIN Video Decoder + * Copyright (C) 2003 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file idcinvideo.c + * Id Quake II Cin Video Decoder by Dr. Tim Ferguson + * For more information about the Id CIN format, visit: + * http://www.csse.monash.edu.au/~timf/ + * + * This video decoder outputs PAL8 colorspace data. Interacting with this + * decoder is a little involved. During initialization, the demuxer must + * transmit the 65536-byte Huffman table(s) to the decoder via extradata. + * Then, whenever a palette change is encountered while demuxing the file, + * the demuxer must use the same extradata space to transmit an + * AVPaletteControl structure. + * + * Id CIN video is purely Huffman-coded, intraframe-only codec. It achieves + * a little more compression by exploiting the fact that adjacent pixels + * tend to be similar. + * + * Note that this decoder could use ffmpeg's optimized VLC facilities + * rather than naive, tree-based Huffman decoding. However, there are 256 + * Huffman tables. Plus, the VLC bit coding order is right -> left instead + * or left -> right, so all of the bits would have to be reversed. Further, + * the original Quake II implementation likely used a similar naive + * decoding algorithm and it worked fine on much lower spec machines. + */ + +#include +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" + +#define HUFFMAN_TABLE_SIZE 64 * 1024 +#define HUF_TOKENS 256 +#define PALETTE_COUNT 256 + +typedef struct +{ + int count; + unsigned char used; + int children[2]; +} hnode_t; + +typedef struct IdcinContext { + + AVCodecContext *avctx; + DSPContext dsp; + AVFrame frame; + + unsigned char *buf; + int size; + + hnode_t huff_nodes[256][HUF_TOKENS*2]; + int num_huff_nodes[256]; + +} IdcinContext; + +/* + * Find the lowest probability node in a Huffman table, and mark it as + * being assigned to a higher probability. + * Returns the node index of the lowest unused node, or -1 if all nodes + * are used. + */ +static int huff_smallest_node(hnode_t *hnodes, int num_hnodes) { + int i; + int best, best_node; + + best = 99999999; + best_node = -1; + for(i = 0; i < num_hnodes; i++) { + if(hnodes[i].used) + continue; + if(!hnodes[i].count) + continue; + if(hnodes[i].count < best) { + best = hnodes[i].count; + best_node = i; + } + } + + if(best_node == -1) + return -1; + hnodes[best_node].used = 1; + return best_node; +} + +/* + * Build the Huffman tree using the generated/loaded probabilities histogram. + * + * On completion: + * huff_nodes[prev][i < HUF_TOKENS] - are the nodes at the base of the tree. + * huff_nodes[prev][i >= HUF_TOKENS] - are used to construct the tree. + * num_huff_nodes[prev] - contains the index to the root node of the tree. + * That is: huff_nodes[prev][num_huff_nodes[prev]] is the root node. + */ +static void huff_build_tree(IdcinContext *s, int prev) { + hnode_t *node, *hnodes; + int num_hnodes, i; + + num_hnodes = HUF_TOKENS; + hnodes = s->huff_nodes[prev]; + for(i = 0; i < HUF_TOKENS * 2; i++) + hnodes[i].used = 0; + + while (1) { + node = &hnodes[num_hnodes]; /* next free node */ + + /* pick two lowest counts */ + node->children[0] = huff_smallest_node(hnodes, num_hnodes); + if(node->children[0] == -1) + break; /* reached the root node */ + + node->children[1] = huff_smallest_node(hnodes, num_hnodes); + if(node->children[1] == -1) + break; /* reached the root node */ + + /* combine nodes probability for new node */ + node->count = hnodes[node->children[0]].count + + hnodes[node->children[1]].count; + num_hnodes++; + } + + s->num_huff_nodes[prev] = num_hnodes - 1; +} + +static int idcin_decode_init(AVCodecContext *avctx) +{ + IdcinContext *s = (IdcinContext *)avctx->priv_data; + int i, j, histogram_index = 0; + unsigned char *histograms; + + s->avctx = avctx; + avctx->pix_fmt = PIX_FMT_PAL8; + avctx->has_b_frames = 0; + dsputil_init(&s->dsp, avctx); + + /* make sure the Huffman tables make it */ + if (s->avctx->extradata_size != HUFFMAN_TABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, " Id CIN video: expected extradata size of %d\n", HUFFMAN_TABLE_SIZE); + return -1; + } + + /* build the 256 Huffman decode trees */ + histograms = (unsigned char *)s->avctx->extradata; + for (i = 0; i < 256; i++) { + for(j = 0; j < HUF_TOKENS; j++) + s->huff_nodes[i][j].count = histograms[histogram_index++]; + huff_build_tree(s, i); + } + + s->frame.data[0] = NULL; + + return 0; +} + +static void idcin_decode_vlcs(IdcinContext *s) +{ + hnode_t *hnodes; + long x, y; + int prev; + unsigned char v = 0; + int bit_pos, node_num, dat_pos; + + prev = bit_pos = dat_pos = 0; + for (y = 0; y < (s->frame.linesize[0] * s->avctx->height); + y += s->frame.linesize[0]) { + for (x = y; x < y + s->avctx->width; x++) { + node_num = s->num_huff_nodes[prev]; + hnodes = s->huff_nodes[prev]; + + while(node_num >= HUF_TOKENS) { + if(!bit_pos) { + if(dat_pos >= s->size) { + av_log(s->avctx, AV_LOG_ERROR, "Huffman decode error.\n"); + return; + } + bit_pos = 8; + v = s->buf[dat_pos++]; + } + + node_num = hnodes[node_num].children[v & 0x01]; + v = v >> 1; + bit_pos--; + } + + s->frame.data[0][x] = node_num; + prev = node_num; + } + } +} + +static int idcin_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + IdcinContext *s = (IdcinContext *)avctx->priv_data; + AVPaletteControl *palette_control = avctx->palctrl; + + s->buf = buf; + s->size = buf_size; + + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + if (avctx->get_buffer(avctx, &s->frame)) { + av_log(avctx, AV_LOG_ERROR, " Id CIN Video: get_buffer() failed\n"); + return -1; + } + + idcin_decode_vlcs(s); + + /* make the palette available on the way out */ + memcpy(s->frame.data[1], palette_control->palette, PALETTE_COUNT * 4); + /* If palette changed inform application*/ + if (palette_control->palette_changed) { + palette_control->palette_changed = 0; + s->frame.palette_has_changed = 1; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = s->frame; + + /* report that the buffer was completely consumed */ + return buf_size; +} + +static int idcin_decode_end(AVCodecContext *avctx) +{ + IdcinContext *s = (IdcinContext *)avctx->priv_data; + + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + return 0; +} + +AVCodec idcin_decoder = { + "idcinvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_IDCIN, + sizeof(IdcinContext), + idcin_decode_init, + NULL, + idcin_decode_end, + idcin_decode_frame, + CODEC_CAP_DR1, +}; + diff --git a/mpeg4/src/libavcodec/imgconvert.c b/mpeg4/src/libavcodec/imgconvert.c new file mode 100644 index 0000000000000000000000000000000000000000..cf0c297ef2c203a15f6b722d52975bfc1d20c803 --- /dev/null +++ b/mpeg4/src/libavcodec/imgconvert.c @@ -0,0 +1,2592 @@ +/* + * Misc image convertion routines + * Copyright (c) 2001, 2002, 2003 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file imgconvert.c + * Misc image convertion routines. + */ + +/* TODO: + * - write 'ffimg' program to test all the image related stuff + * - move all api to slice based system + * - integrate deinterlacing, postprocessing and scaling in the conversion process + */ + +#include "avcodec.h" +#include "dsputil.h" + +#ifdef USE_FASTMEMCPY +#include "fastmemcpy.h" +#endif + +#ifdef HAVE_MMX +#include "i386/mmx.h" +#endif + +#define xglue(x, y) x ## y +#define glue(x, y) xglue(x, y) + +#define FF_COLOR_RGB 0 /* RGB color space */ +#define FF_COLOR_GRAY 1 /* gray color space */ +#define FF_COLOR_YUV 2 /* YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */ +#define FF_COLOR_YUV_JPEG 3 /* YUV color space. 0 <= Y <= 255, 0 <= U, V <= 255 */ + +#define FF_PIXEL_PLANAR 0 /* each channel has one component in AVPicture */ +#define FF_PIXEL_PACKED 1 /* only one components containing all the channels */ +#define FF_PIXEL_PALETTE 2 /* one components containing indexes for a palette */ + +typedef struct PixFmtInfo { + const char *name; + uint8_t nb_channels; /* number of channels (including alpha) */ + uint8_t color_type; /* color type (see FF_COLOR_xxx constants) */ + uint8_t pixel_type; /* pixel storage type (see FF_PIXEL_xxx constants) */ + uint8_t is_alpha : 1; /* true if alpha can be specified */ + uint8_t x_chroma_shift; /* X chroma subsampling factor is 2 ^ shift */ + uint8_t y_chroma_shift; /* Y chroma subsampling factor is 2 ^ shift */ + uint8_t depth; /* bit depth of the color components */ +} PixFmtInfo; + +/* this table gives more information about formats */ +static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { + /* YUV formats */ + [PIX_FMT_YUV420P] = { + .name = "yuv420p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 1, + }, + [PIX_FMT_YUV422P] = { + .name = "yuv422p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, + [PIX_FMT_YUV444P] = { + .name = "yuv444p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + [PIX_FMT_YUV422] = { + .name = "yuv422", + .nb_channels = 1, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, + [PIX_FMT_UYVY422] = { + .name = "uyvy422", + .nb_channels = 1, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, + [PIX_FMT_YUV410P] = { + .name = "yuv410p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 2, .y_chroma_shift = 2, + }, + [PIX_FMT_YUV411P] = { + .name = "yuv411p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 2, .y_chroma_shift = 0, + }, + + /* JPEG YUV */ + [PIX_FMT_YUVJ420P] = { + .name = "yuvj420p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV_JPEG, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 1, + }, + [PIX_FMT_YUVJ422P] = { + .name = "yuvj422p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV_JPEG, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, + [PIX_FMT_YUVJ444P] = { + .name = "yuvj444p", + .nb_channels = 3, + .color_type = FF_COLOR_YUV_JPEG, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + + /* RGB formats */ + [PIX_FMT_RGB24] = { + .name = "rgb24", + .nb_channels = 3, + .color_type = FF_COLOR_RGB, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + [PIX_FMT_BGR24] = { + .name = "bgr24", + .nb_channels = 3, + .color_type = FF_COLOR_RGB, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + [PIX_FMT_RGBA32] = { + .name = "rgba32", + .nb_channels = 4, .is_alpha = 1, + .color_type = FF_COLOR_RGB, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + [PIX_FMT_RGB565] = { + .name = "rgb565", + .nb_channels = 3, + .color_type = FF_COLOR_RGB, + .pixel_type = FF_PIXEL_PACKED, + .depth = 5, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + [PIX_FMT_RGB555] = { + .name = "rgb555", + .nb_channels = 4, .is_alpha = 1, + .color_type = FF_COLOR_RGB, + .pixel_type = FF_PIXEL_PACKED, + .depth = 5, + .x_chroma_shift = 0, .y_chroma_shift = 0, + }, + + /* gray / mono formats */ + [PIX_FMT_GRAY8] = { + .name = "gray", + .nb_channels = 1, + .color_type = FF_COLOR_GRAY, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 8, + }, + [PIX_FMT_MONOWHITE] = { + .name = "monow", + .nb_channels = 1, + .color_type = FF_COLOR_GRAY, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 1, + }, + [PIX_FMT_MONOBLACK] = { + .name = "monob", + .nb_channels = 1, + .color_type = FF_COLOR_GRAY, + .pixel_type = FF_PIXEL_PLANAR, + .depth = 1, + }, + + /* paletted formats */ + [PIX_FMT_PAL8] = { + .name = "pal8", + .nb_channels = 4, .is_alpha = 1, + .color_type = FF_COLOR_RGB, + .pixel_type = FF_PIXEL_PALETTE, + .depth = 8, + }, + [PIX_FMT_XVMC_MPEG2_MC] = { + .name = "xvmcmc", + }, + [PIX_FMT_XVMC_MPEG2_IDCT] = { + .name = "xvmcidct", + }, + [PIX_FMT_UYVY411] = { + .name = "uyvy411", + .nb_channels = 1, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 2, .y_chroma_shift = 0, + }, +}; + +void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift) +{ + *h_shift = pix_fmt_info[pix_fmt].x_chroma_shift; + *v_shift = pix_fmt_info[pix_fmt].y_chroma_shift; +} + +const char *avcodec_get_pix_fmt_name(int pix_fmt) +{ + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB) + return "???"; + else + return pix_fmt_info[pix_fmt].name; +} + +enum PixelFormat avcodec_get_pix_fmt(const char* name) +{ + int i; + + for (i=0; i < PIX_FMT_NB; i++) + if (!strcmp(pix_fmt_info[i].name, name)) + break; + return i; +} + +/* Picture field are filled with 'ptr' addresses. Also return size */ +int avpicture_fill(AVPicture *picture, uint8_t *ptr, + int pix_fmt, int width, int height) +{ + int size, w2, h2, size2; + PixFmtInfo *pinfo; + + if(avcodec_check_dimensions(NULL, width, height)) + goto fail; + + pinfo = &pix_fmt_info[pix_fmt]; + size = width * height; + switch(pix_fmt) { + case PIX_FMT_YUV420P: + case PIX_FMT_YUV422P: + case PIX_FMT_YUV444P: + case PIX_FMT_YUV410P: + case PIX_FMT_YUV411P: + case PIX_FMT_YUVJ420P: + case PIX_FMT_YUVJ422P: + case PIX_FMT_YUVJ444P: + w2 = (width + (1 << pinfo->x_chroma_shift) - 1) >> pinfo->x_chroma_shift; + h2 = (height + (1 << pinfo->y_chroma_shift) - 1) >> pinfo->y_chroma_shift; + size2 = w2 * h2; + picture->data[0] = ptr; + picture->data[1] = picture->data[0] + size; + picture->data[2] = picture->data[1] + size2; + picture->linesize[0] = width; + picture->linesize[1] = w2; + picture->linesize[2] = w2; + return size + 2 * size2; + case PIX_FMT_RGB24: + case PIX_FMT_BGR24: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 3; + return size * 3; + case PIX_FMT_RGBA32: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 4; + return size * 4; + case PIX_FMT_RGB555: + case PIX_FMT_RGB565: + case PIX_FMT_YUV422: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 2; + return size * 2; + case PIX_FMT_UYVY422: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 2; + return size * 2; + case PIX_FMT_UYVY411: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width + width/2; + return size + size/2; + case PIX_FMT_GRAY8: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width; + return size; + case PIX_FMT_MONOWHITE: + case PIX_FMT_MONOBLACK: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = (width + 7) >> 3; + return picture->linesize[0] * height; + case PIX_FMT_PAL8: + size2 = (size + 3) & ~3; + picture->data[0] = ptr; + picture->data[1] = ptr + size2; /* palette is stored here as 256 32 bit words */ + picture->data[2] = NULL; + picture->linesize[0] = width; + picture->linesize[1] = 4; + return size2 + 256 * 4; + default: +fail: + picture->data[0] = NULL; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->data[3] = NULL; + return -1; + } +} + +int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, + unsigned char *dest, int dest_size) +{ + PixFmtInfo* pf = &pix_fmt_info[pix_fmt]; + int i, j, w, h, data_planes; + const unsigned char* s; + int size = avpicture_get_size(pix_fmt, width, height); + + if (size > dest_size || size < 0) + return -1; + + if (pf->pixel_type == FF_PIXEL_PACKED || pf->pixel_type == FF_PIXEL_PALETTE) { + if (pix_fmt == PIX_FMT_YUV422 || + pix_fmt == PIX_FMT_UYVY422 || + pix_fmt == PIX_FMT_RGB565 || + pix_fmt == PIX_FMT_RGB555) + w = width * 2; + else if (pix_fmt == PIX_FMT_UYVY411) + w = width + width/2; + else if (pix_fmt == PIX_FMT_PAL8) + w = width; + else + w = width * (pf->depth * pf->nb_channels / 8); + + data_planes = 1; + h = height; + } else { + data_planes = pf->nb_channels; + w = (width*pf->depth + 7)/8; + h = height; + } + + for (i=0; i> pf->x_chroma_shift; + h = height >> pf->y_chroma_shift; + } + s = src->data[i]; + for(j=0; jlinesize[i]; + } + } + + if (pf->pixel_type == FF_PIXEL_PALETTE) + memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4); + + return size; +} + +int avpicture_get_size(int pix_fmt, int width, int height) +{ + AVPicture dummy_pict; + return avpicture_fill(&dummy_pict, NULL, pix_fmt, width, height); +} + +/** + * compute the loss when converting from a pixel format to another + */ +int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt, + int has_alpha) +{ + const PixFmtInfo *pf, *ps; + int loss; + + ps = &pix_fmt_info[src_pix_fmt]; + pf = &pix_fmt_info[dst_pix_fmt]; + + /* compute loss */ + loss = 0; + pf = &pix_fmt_info[dst_pix_fmt]; + if (pf->depth < ps->depth || + (dst_pix_fmt == PIX_FMT_RGB555 && src_pix_fmt == PIX_FMT_RGB565)) + loss |= FF_LOSS_DEPTH; + if (pf->x_chroma_shift > ps->x_chroma_shift || + pf->y_chroma_shift > ps->y_chroma_shift) + loss |= FF_LOSS_RESOLUTION; + switch(pf->color_type) { + case FF_COLOR_RGB: + if (ps->color_type != FF_COLOR_RGB && + ps->color_type != FF_COLOR_GRAY) + loss |= FF_LOSS_COLORSPACE; + break; + case FF_COLOR_GRAY: + if (ps->color_type != FF_COLOR_GRAY) + loss |= FF_LOSS_COLORSPACE; + break; + case FF_COLOR_YUV: + if (ps->color_type != FF_COLOR_YUV) + loss |= FF_LOSS_COLORSPACE; + break; + case FF_COLOR_YUV_JPEG: + if (ps->color_type != FF_COLOR_YUV_JPEG && + ps->color_type != FF_COLOR_YUV && + ps->color_type != FF_COLOR_GRAY) + loss |= FF_LOSS_COLORSPACE; + break; + default: + /* fail safe test */ + if (ps->color_type != pf->color_type) + loss |= FF_LOSS_COLORSPACE; + break; + } + if (pf->color_type == FF_COLOR_GRAY && + ps->color_type != FF_COLOR_GRAY) + loss |= FF_LOSS_CHROMA; + if (!pf->is_alpha && (ps->is_alpha && has_alpha)) + loss |= FF_LOSS_ALPHA; + if (pf->pixel_type == FF_PIXEL_PALETTE && + (ps->pixel_type != FF_PIXEL_PALETTE && ps->color_type != FF_COLOR_GRAY)) + loss |= FF_LOSS_COLORQUANT; + return loss; +} + +static int avg_bits_per_pixel(int pix_fmt) +{ + int bits; + const PixFmtInfo *pf; + + pf = &pix_fmt_info[pix_fmt]; + switch(pf->pixel_type) { + case FF_PIXEL_PACKED: + switch(pix_fmt) { + case PIX_FMT_YUV422: + case PIX_FMT_UYVY422: + case PIX_FMT_RGB565: + case PIX_FMT_RGB555: + bits = 16; + break; + case PIX_FMT_UYVY411: + bits = 12; + break; + default: + bits = pf->depth * pf->nb_channels; + break; + } + break; + case FF_PIXEL_PLANAR: + if (pf->x_chroma_shift == 0 && pf->y_chroma_shift == 0) { + bits = pf->depth * pf->nb_channels; + } else { + bits = pf->depth + ((2 * pf->depth) >> + (pf->x_chroma_shift + pf->y_chroma_shift)); + } + break; + case FF_PIXEL_PALETTE: + bits = 8; + break; + default: + bits = -1; + break; + } + return bits; +} + +static int avcodec_find_best_pix_fmt1(int pix_fmt_mask, + int src_pix_fmt, + int has_alpha, + int loss_mask) +{ + int dist, i, loss, min_dist, dst_pix_fmt; + + /* find exact color match with smallest size */ + dst_pix_fmt = -1; + min_dist = 0x7fffffff; + for(i = 0;i < PIX_FMT_NB; i++) { + if (pix_fmt_mask & (1 << i)) { + loss = avcodec_get_pix_fmt_loss(i, src_pix_fmt, has_alpha) & loss_mask; + if (loss == 0) { + dist = avg_bits_per_pixel(i); + if (dist < min_dist) { + min_dist = dist; + dst_pix_fmt = i; + } + } + } + } + return dst_pix_fmt; +} + +/** + * find best pixel format to convert to. Return -1 if none found + */ +int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt, + int has_alpha, int *loss_ptr) +{ + int dst_pix_fmt, loss_mask, i; + static const int loss_mask_order[] = { + ~0, /* no loss first */ + ~FF_LOSS_ALPHA, + ~FF_LOSS_RESOLUTION, + ~(FF_LOSS_COLORSPACE | FF_LOSS_RESOLUTION), + ~FF_LOSS_COLORQUANT, + ~FF_LOSS_DEPTH, + 0, + }; + + /* try with successive loss */ + i = 0; + for(;;) { + loss_mask = loss_mask_order[i++]; + dst_pix_fmt = avcodec_find_best_pix_fmt1(pix_fmt_mask, src_pix_fmt, + has_alpha, loss_mask); + if (dst_pix_fmt >= 0) + goto found; + if (loss_mask == 0) + break; + } + return -1; + found: + if (loss_ptr) + *loss_ptr = avcodec_get_pix_fmt_loss(dst_pix_fmt, src_pix_fmt, has_alpha); + return dst_pix_fmt; +} + +void ff_img_copy_plane(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + if((!dst) || (!src)) + return; + for(;height > 0; height--) { + memcpy(dst, src, width); + dst += dst_wrap; + src += src_wrap; + } +} + +/** + * Copy image 'src' to 'dst'. + */ +void img_copy(AVPicture *dst, const AVPicture *src, + int pix_fmt, int width, int height) +{ + int bwidth, bits, i; + PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; + + pf = &pix_fmt_info[pix_fmt]; + switch(pf->pixel_type) { + case FF_PIXEL_PACKED: + switch(pix_fmt) { + case PIX_FMT_YUV422: + case PIX_FMT_UYVY422: + case PIX_FMT_RGB565: + case PIX_FMT_RGB555: + bits = 16; + break; + case PIX_FMT_UYVY411: + bits = 12; + break; + default: + bits = pf->depth * pf->nb_channels; + break; + } + bwidth = (width * bits + 7) >> 3; + ff_img_copy_plane(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + bwidth, height); + break; + case FF_PIXEL_PLANAR: + for(i = 0; i < pf->nb_channels; i++) { + int w, h; + w = width; + h = height; + if (i == 1 || i == 2) { + w >>= pf->x_chroma_shift; + h >>= pf->y_chroma_shift; + } + bwidth = (w * pf->depth + 7) >> 3; + ff_img_copy_plane(dst->data[i], dst->linesize[i], + src->data[i], src->linesize[i], + bwidth, h); + } + break; + case FF_PIXEL_PALETTE: + ff_img_copy_plane(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + width, height); + /* copy the palette */ + ff_img_copy_plane(dst->data[1], dst->linesize[1], + src->data[1], src->linesize[1], + 4, 256); + break; + } +} + +/* XXX: totally non optimized */ + +static void yuv422_to_yuv420p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + + for(;height >= 1; height -= 2) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[0]; + cb[0] = p[1]; + lum[1] = p[2]; + cr[0] = p[3]; + p += 4; + lum += 2; + cb++; + cr++; + } + if (w) { + lum[0] = p[0]; + cb[0] = p[1]; + cr[0] = p[3]; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + if (height>1) { + p = p1; + lum = lum1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[0]; + lum[1] = p[2]; + p += 4; + lum += 2; + } + if (w) { + lum[0] = p[0]; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + } + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + +static void uyvy422_to_yuv420p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + + for(;height >= 1; height -= 2) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[1]; + cb[0] = p[0]; + lum[1] = p[3]; + cr[0] = p[2]; + p += 4; + lum += 2; + cb++; + cr++; + } + if (w) { + lum[0] = p[1]; + cb[0] = p[0]; + cr[0] = p[2]; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + if (height>1) { + p = p1; + lum = lum1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[1]; + lum[1] = p[3]; + p += 4; + lum += 2; + } + if (w) { + lum[0] = p[1]; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + } + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + + +static void uyvy422_to_yuv422p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[1]; + cb[0] = p[0]; + lum[1] = p[3]; + cr[0] = p[2]; + p += 4; + lum += 2; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + + +static void yuv422_to_yuv422p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[0]; + cb[0] = p[1]; + lum[1] = p[2]; + cr[0] = p[3]; + p += 4; + lum += 2; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + +static void yuv422p_to_yuv422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + uint8_t *p, *p1; + const uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = dst->data[0]; + lum1 = src->data[0]; + cb1 = src->data[1]; + cr1 = src->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + p[0] = lum[0]; + p[1] = cb[0]; + p[2] = lum[1]; + p[3] = cr[0]; + p += 4; + lum += 2; + cb++; + cr++; + } + p1 += dst->linesize[0]; + lum1 += src->linesize[0]; + cb1 += src->linesize[1]; + cr1 += src->linesize[2]; + } +} + +static void yuv422p_to_uyvy422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + uint8_t *p, *p1; + const uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = dst->data[0]; + lum1 = src->data[0]; + cb1 = src->data[1]; + cr1 = src->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + p[1] = lum[0]; + p[0] = cb[0]; + p[3] = lum[1]; + p[2] = cr[0]; + p += 4; + lum += 2; + cb++; + cr++; + } + p1 += dst->linesize[0]; + lum1 += src->linesize[0]; + cb1 += src->linesize[1]; + cr1 += src->linesize[2]; + } +} + +static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 4; w -= 4) { + cb[0] = p[0]; + lum[0] = p[1]; + lum[1] = p[2]; + cr[0] = p[3]; + lum[2] = p[4]; + lum[3] = p[5]; + p += 6; + lum += 4; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + + +static void yuv420p_to_yuv422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int w, h; + uint8_t *line1, *line2, *linesrc = dst->data[0]; + uint8_t *lum1, *lum2, *lumsrc = src->data[0]; + uint8_t *cb1, *cb2 = src->data[1]; + uint8_t *cr1, *cr2 = src->data[2]; + + for(h = height / 2; h--;) { + line1 = linesrc; + line2 = linesrc + dst->linesize[0]; + + lum1 = lumsrc; + lum2 = lumsrc + src->linesize[0]; + + cb1 = cb2; + cr1 = cr2; + + for(w = width / 2; w--;) { + *line1++ = *lum1++; *line2++ = *lum2++; + *line1++ = *line2++ = *cb1++; + *line1++ = *lum1++; *line2++ = *lum2++; + *line1++ = *line2++ = *cr1++; + } + + linesrc += dst->linesize[0] * 2; + lumsrc += src->linesize[0] * 2; + cb2 += src->linesize[1]; + cr2 += src->linesize[2]; + } +} + +static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int w, h; + uint8_t *line1, *line2, *linesrc = dst->data[0]; + uint8_t *lum1, *lum2, *lumsrc = src->data[0]; + uint8_t *cb1, *cb2 = src->data[1]; + uint8_t *cr1, *cr2 = src->data[2]; + + for(h = height / 2; h--;) { + line1 = linesrc; + line2 = linesrc + dst->linesize[0]; + + lum1 = lumsrc; + lum2 = lumsrc + src->linesize[0]; + + cb1 = cb2; + cr1 = cr2; + + for(w = width / 2; w--;) { + *line1++ = *line2++ = *cb1++; + *line1++ = *lum1++; *line2++ = *lum2++; + *line1++ = *line2++ = *cr1++; + *line1++ = *lum1++; *line2++ = *lum2++; + } + + linesrc += dst->linesize[0] * 2; + lumsrc += src->linesize[0] * 2; + cb2 += src->linesize[1]; + cr2 += src->linesize[2]; + } +} + +#define SCALEBITS 10 +#define ONE_HALF (1 << (SCALEBITS - 1)) +#define FIX(x) ((int) ((x) * (1<> SCALEBITS];\ + g = cm[(y + g_add) >> SCALEBITS];\ + b = cm[(y + b_add) >> SCALEBITS];\ +} + +#define YUV_TO_RGB1(cb1, cr1)\ +{\ + cb = (cb1) - 128;\ + cr = (cr1) - 128;\ + r_add = FIX(1.40200) * cr + ONE_HALF;\ + g_add = - FIX(0.34414) * cb - FIX(0.71414) * cr + ONE_HALF;\ + b_add = FIX(1.77200) * cb + ONE_HALF;\ +} + +#define YUV_TO_RGB2(r, g, b, y1)\ +{\ + y = (y1) << SCALEBITS;\ + r = cm[(y + r_add) >> SCALEBITS];\ + g = cm[(y + g_add) >> SCALEBITS];\ + b = cm[(y + b_add) >> SCALEBITS];\ +} + +#define Y_CCIR_TO_JPEG(y)\ + cm[((y) * FIX(255.0/219.0) + (ONE_HALF - 16 * FIX(255.0/219.0))) >> SCALEBITS] + +#define Y_JPEG_TO_CCIR(y)\ + (((y) * FIX(219.0/255.0) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS) + +#define C_CCIR_TO_JPEG(y)\ + cm[(((y) - 128) * FIX(127.0/112.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS] + +/* NOTE: the clamp is really necessary! */ +static inline int C_JPEG_TO_CCIR(int y) { + y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS); + if (y < 16) + y = 16; + return y; +} + + +#define RGB_TO_Y(r, g, b) \ +((FIX(0.29900) * (r) + FIX(0.58700) * (g) + \ + FIX(0.11400) * (b) + ONE_HALF) >> SCALEBITS) + +#define RGB_TO_U(r1, g1, b1, shift)\ +(((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + \ + FIX(0.50000) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_V(r1, g1, b1, shift)\ +(((FIX(0.50000) * r1 - FIX(0.41869) * g1 - \ + FIX(0.08131) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_Y_CCIR(r, g, b) \ +((FIX(0.29900*219.0/255.0) * (r) + FIX(0.58700*219.0/255.0) * (g) + \ + FIX(0.11400*219.0/255.0) * (b) + (ONE_HALF + (16 << SCALEBITS))) >> SCALEBITS) + +#define RGB_TO_U_CCIR(r1, g1, b1, shift)\ +(((- FIX(0.16874*224.0/255.0) * r1 - FIX(0.33126*224.0/255.0) * g1 + \ + FIX(0.50000*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +#define RGB_TO_V_CCIR(r1, g1, b1, shift)\ +(((FIX(0.50000*224.0/255.0) * r1 - FIX(0.41869*224.0/255.0) * g1 - \ + FIX(0.08131*224.0/255.0) * b1 + (ONE_HALF << shift) - 1) >> (SCALEBITS + shift)) + 128) + +static uint8_t y_ccir_to_jpeg[256]; +static uint8_t y_jpeg_to_ccir[256]; +static uint8_t c_ccir_to_jpeg[256]; +static uint8_t c_jpeg_to_ccir[256]; + +/* init various conversion tables */ +static void img_convert_init(void) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + for(i = 0;i < 256; i++) { + y_ccir_to_jpeg[i] = Y_CCIR_TO_JPEG(i); + y_jpeg_to_ccir[i] = Y_JPEG_TO_CCIR(i); + c_ccir_to_jpeg[i] = C_CCIR_TO_JPEG(i); + c_jpeg_to_ccir[i] = C_JPEG_TO_CCIR(i); + } +} + +/* apply to each pixel the given table */ +static void img_apply_table(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height, const uint8_t *table1) +{ + int n; + const uint8_t *s; + uint8_t *d; + const uint8_t *table; + + table = table1; + for(;height > 0; height--) { + s = src; + d = dst; + n = width; + while (n >= 4) { + d[0] = table[s[0]]; + d[1] = table[s[1]]; + d[2] = table[s[2]]; + d[3] = table[s[3]]; + d += 4; + s += 4; + n -= 4; + } + while (n > 0) { + d[0] = table[s[0]]; + d++; + s++; + n--; + } + dst += dst_wrap; + src += src_wrap; + } +} + +/* XXX: use generic filter ? */ +/* XXX: in most cases, the sampling position is incorrect */ + +/* 4x1 -> 1x1 */ +static void shrink41(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w; + const uint8_t *s; + uint8_t *d; + + for(;height > 0; height--) { + s = src; + d = dst; + for(w = width;w > 0; w--) { + d[0] = (s[0] + s[1] + s[2] + s[3] + 2) >> 2; + s += 4; + d++; + } + src += src_wrap; + dst += dst_wrap; + } +} + +/* 2x1 -> 1x1 */ +static void shrink21(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w; + const uint8_t *s; + uint8_t *d; + + for(;height > 0; height--) { + s = src; + d = dst; + for(w = width;w > 0; w--) { + d[0] = (s[0] + s[1]) >> 1; + s += 2; + d++; + } + src += src_wrap; + dst += dst_wrap; + } +} + +/* 1x2 -> 1x1 */ +static void shrink12(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w; + uint8_t *d; + const uint8_t *s1, *s2; + + for(;height > 0; height--) { + s1 = src; + s2 = s1 + src_wrap; + d = dst; + for(w = width;w >= 4; w-=4) { + d[0] = (s1[0] + s2[0]) >> 1; + d[1] = (s1[1] + s2[1]) >> 1; + d[2] = (s1[2] + s2[2]) >> 1; + d[3] = (s1[3] + s2[3]) >> 1; + s1 += 4; + s2 += 4; + d += 4; + } + for(;w > 0; w--) { + d[0] = (s1[0] + s2[0]) >> 1; + s1++; + s2++; + d++; + } + src += 2 * src_wrap; + dst += dst_wrap; + } +} + +/* 2x2 -> 1x1 */ +void ff_shrink22(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w; + const uint8_t *s1, *s2; + uint8_t *d; + + for(;height > 0; height--) { + s1 = src; + s2 = s1 + src_wrap; + d = dst; + for(w = width;w >= 4; w-=4) { + d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2; + d[1] = (s1[2] + s1[3] + s2[2] + s2[3] + 2) >> 2; + d[2] = (s1[4] + s1[5] + s2[4] + s2[5] + 2) >> 2; + d[3] = (s1[6] + s1[7] + s2[6] + s2[7] + 2) >> 2; + s1 += 8; + s2 += 8; + d += 4; + } + for(;w > 0; w--) { + d[0] = (s1[0] + s1[1] + s2[0] + s2[1] + 2) >> 2; + s1 += 2; + s2 += 2; + d++; + } + src += 2 * src_wrap; + dst += dst_wrap; + } +} + +/* 4x4 -> 1x1 */ +void ff_shrink44(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w; + const uint8_t *s1, *s2, *s3, *s4; + uint8_t *d; + + for(;height > 0; height--) { + s1 = src; + s2 = s1 + src_wrap; + s3 = s2 + src_wrap; + s4 = s3 + src_wrap; + d = dst; + for(w = width;w > 0; w--) { + d[0] = (s1[0] + s1[1] + s1[2] + s1[3] + + s2[0] + s2[1] + s2[2] + s2[3] + + s3[0] + s3[1] + s3[2] + s3[3] + + s4[0] + s4[1] + s4[2] + s4[3] + 8) >> 4; + s1 += 4; + s2 += 4; + s3 += 4; + s4 += 4; + d++; + } + src += 4 * src_wrap; + dst += dst_wrap; + } +} + +/* 8x8 -> 1x1 */ +void ff_shrink88(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w, i; + + for(;height > 0; height--) { + for(w = width;w > 0; w--) { + int tmp=0; + for(i=0; i<8; i++){ + tmp += src[0] + src[1] + src[2] + src[3] + src[4] + src[5] + src[6] + src[7]; + src += src_wrap; + } + *(dst++) = (tmp + 32)>>6; + src += 8 - 8*src_wrap; + } + src += 8*src_wrap - 8*width; + dst += dst_wrap - width; + } +} + +static void grow21_line(uint8_t *dst, const uint8_t *src, + int width) +{ + int w; + const uint8_t *s1; + uint8_t *d; + + s1 = src; + d = dst; + for(w = width;w >= 4; w-=4) { + d[1] = d[0] = s1[0]; + d[3] = d[2] = s1[1]; + s1 += 2; + d += 4; + } + for(;w >= 2; w -= 2) { + d[1] = d[0] = s1[0]; + s1 ++; + d += 2; + } + /* only needed if width is not a multiple of two */ + /* XXX: veryfy that */ + if (w) { + d[0] = s1[0]; + } +} + +static void grow41_line(uint8_t *dst, const uint8_t *src, + int width) +{ + int w, v; + const uint8_t *s1; + uint8_t *d; + + s1 = src; + d = dst; + for(w = width;w >= 4; w-=4) { + v = s1[0]; + d[0] = v; + d[1] = v; + d[2] = v; + d[3] = v; + s1 ++; + d += 4; + } +} + +/* 1x1 -> 2x1 */ +static void grow21(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + for(;height > 0; height--) { + grow21_line(dst, src, width); + src += src_wrap; + dst += dst_wrap; + } +} + +/* 1x1 -> 2x2 */ +static void grow22(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + for(;height > 0; height--) { + grow21_line(dst, src, width); + if (height%2) + src += src_wrap; + dst += dst_wrap; + } +} + +/* 1x1 -> 4x1 */ +static void grow41(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + for(;height > 0; height--) { + grow41_line(dst, src, width); + src += src_wrap; + dst += dst_wrap; + } +} + +/* 1x1 -> 4x4 */ +static void grow44(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + for(;height > 0; height--) { + grow41_line(dst, src, width); + if ((height & 3) == 1) + src += src_wrap; + dst += dst_wrap; + } +} + +/* 1x2 -> 2x1 */ +static void conv411(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w, c; + const uint8_t *s1, *s2; + uint8_t *d; + + width>>=1; + + for(;height > 0; height--) { + s1 = src; + s2 = src + src_wrap; + d = dst; + for(w = width;w > 0; w--) { + c = (s1[0] + s2[0]) >> 1; + d[0] = c; + d[1] = c; + s1++; + s2++; + d += 2; + } + src += src_wrap * 2; + dst += dst_wrap; + } +} + +/* XXX: add jpeg quantize code */ + +#define TRANSP_INDEX (6*6*6) + +/* this is maybe slow, but allows for extensions */ +static inline unsigned char gif_clut_index(uint8_t r, uint8_t g, uint8_t b) +{ + return ((((r)/47)%6)*6*6+(((g)/47)%6)*6+(((b)/47)%6)); +} + +static void build_rgb_palette(uint8_t *palette, int has_alpha) +{ + uint32_t *pal; + static const uint8_t pal_value[6] = { 0x00, 0x33, 0x66, 0x99, 0xcc, 0xff }; + int i, r, g, b; + + pal = (uint32_t *)palette; + i = 0; + for(r = 0; r < 6; r++) { + for(g = 0; g < 6; g++) { + for(b = 0; b < 6; b++) { + pal[i++] = (0xff << 24) | (pal_value[r] << 16) | + (pal_value[g] << 8) | pal_value[b]; + } + } + } + if (has_alpha) + pal[i++] = 0; + while (i < 256) + pal[i++] = 0xff000000; +} + +/* copy bit n to bits 0 ... n - 1 */ +static inline unsigned int bitcopy_n(unsigned int a, int n) +{ + int mask; + mask = (1 << n) - 1; + return (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask); +} + +/* rgb555 handling */ + +#define RGB_NAME rgb555 + +#define RGB_IN(r, g, b, s)\ +{\ + unsigned int v = ((const uint16_t *)(s))[0];\ + r = bitcopy_n(v >> (10 - 3), 3);\ + g = bitcopy_n(v >> (5 - 3), 3);\ + b = bitcopy_n(v << 3, 3);\ +} + +#define RGBA_IN(r, g, b, a, s)\ +{\ + unsigned int v = ((const uint16_t *)(s))[0];\ + r = bitcopy_n(v >> (10 - 3), 3);\ + g = bitcopy_n(v >> (5 - 3), 3);\ + b = bitcopy_n(v << 3, 3);\ + a = (-(v >> 15)) & 0xff;\ +} + +#define RGBA_OUT(d, r, g, b, a)\ +{\ + ((uint16_t *)(d))[0] = ((r >> 3) << 10) | ((g >> 3) << 5) | (b >> 3) | \ + ((a << 8) & 0x8000);\ +} + +#define BPP 2 + +#include "imgconvert_template.h" + +/* rgb565 handling */ + +#define RGB_NAME rgb565 + +#define RGB_IN(r, g, b, s)\ +{\ + unsigned int v = ((const uint16_t *)(s))[0];\ + r = bitcopy_n(v >> (11 - 3), 3);\ + g = bitcopy_n(v >> (5 - 2), 2);\ + b = bitcopy_n(v << 3, 3);\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + ((uint16_t *)(d))[0] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);\ +} + +#define BPP 2 + +#include "imgconvert_template.h" + +/* bgr24 handling */ + +#define RGB_NAME bgr24 + +#define RGB_IN(r, g, b, s)\ +{\ + b = (s)[0];\ + g = (s)[1];\ + r = (s)[2];\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + (d)[0] = b;\ + (d)[1] = g;\ + (d)[2] = r;\ +} + +#define BPP 3 + +#include "imgconvert_template.h" + +#undef RGB_IN +#undef RGB_OUT +#undef BPP + +/* rgb24 handling */ + +#define RGB_NAME rgb24 +#define FMT_RGB24 + +#define RGB_IN(r, g, b, s)\ +{\ + r = (s)[0];\ + g = (s)[1];\ + b = (s)[2];\ +} + +#define RGB_OUT(d, r, g, b)\ +{\ + (d)[0] = r;\ + (d)[1] = g;\ + (d)[2] = b;\ +} + +#define BPP 3 + +#include "imgconvert_template.h" + +/* rgba32 handling */ + +#define RGB_NAME rgba32 +#define FMT_RGBA32 + +#define RGB_IN(r, g, b, s)\ +{\ + unsigned int v = ((const uint32_t *)(s))[0];\ + r = (v >> 16) & 0xff;\ + g = (v >> 8) & 0xff;\ + b = v & 0xff;\ +} + +#define RGBA_IN(r, g, b, a, s)\ +{\ + unsigned int v = ((const uint32_t *)(s))[0];\ + a = (v >> 24) & 0xff;\ + r = (v >> 16) & 0xff;\ + g = (v >> 8) & 0xff;\ + b = v & 0xff;\ +} + +#define RGBA_OUT(d, r, g, b, a)\ +{\ + ((uint32_t *)(d))[0] = (a << 24) | (r << 16) | (g << 8) | b;\ +} + +#define BPP 4 + +#include "imgconvert_template.h" + +static void mono_to_gray(AVPicture *dst, const AVPicture *src, + int width, int height, int xor_mask) +{ + const unsigned char *p; + unsigned char *q; + int v, dst_wrap, src_wrap; + int y, w; + + p = src->data[0]; + src_wrap = src->linesize[0] - ((width + 7) >> 3); + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - width; + for(y=0;y= 8) { + v = *p++ ^ xor_mask; + q[0] = -(v >> 7); + q[1] = -((v >> 6) & 1); + q[2] = -((v >> 5) & 1); + q[3] = -((v >> 4) & 1); + q[4] = -((v >> 3) & 1); + q[5] = -((v >> 2) & 1); + q[6] = -((v >> 1) & 1); + q[7] = -((v >> 0) & 1); + w -= 8; + q += 8; + } + if (w > 0) { + v = *p++ ^ xor_mask; + do { + q[0] = -((v >> 7) & 1); + q++; + v <<= 1; + } while (--w); + } + p += src_wrap; + q += dst_wrap; + } +} + +static void monowhite_to_gray(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + mono_to_gray(dst, src, width, height, 0xff); +} + +static void monoblack_to_gray(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + mono_to_gray(dst, src, width, height, 0x00); +} + +static void gray_to_mono(AVPicture *dst, const AVPicture *src, + int width, int height, int xor_mask) +{ + int n; + const uint8_t *s; + uint8_t *d; + int j, b, v, n1, src_wrap, dst_wrap, y; + + s = src->data[0]; + src_wrap = src->linesize[0] - width; + + d = dst->data[0]; + dst_wrap = dst->linesize[0] - ((width + 7) >> 3); + + for(y=0;y= 8) { + v = 0; + for(j=0;j<8;j++) { + b = s[0]; + s++; + v = (v << 1) | (b >> 7); + } + d[0] = v ^ xor_mask; + d++; + n -= 8; + } + if (n > 0) { + n1 = n; + v = 0; + while (n > 0) { + b = s[0]; + s++; + v = (v << 1) | (b >> 7); + n--; + } + d[0] = (v << (8 - (n1 & 7))) ^ xor_mask; + d++; + } + s += src_wrap; + d += dst_wrap; + } +} + +static void gray_to_monowhite(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + gray_to_mono(dst, src, width, height, 0xff); +} + +static void gray_to_monoblack(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + gray_to_mono(dst, src, width, height, 0x00); +} + +typedef struct ConvertEntry { + void (*convert)(AVPicture *dst, + const AVPicture *src, int width, int height); +} ConvertEntry; + +/* Add each new convertion function in this table. In order to be able + to convert from any format to any format, the following constraints + must be satisfied: + + - all FF_COLOR_RGB formats must convert to and from PIX_FMT_RGB24 + + - all FF_COLOR_GRAY formats must convert to and from PIX_FMT_GRAY8 + + - all FF_COLOR_RGB formats with alpha must convert to and from PIX_FMT_RGBA32 + + - PIX_FMT_YUV444P and PIX_FMT_YUVJ444P must convert to and from + PIX_FMT_RGB24. + + - PIX_FMT_422 must convert to and from PIX_FMT_422P. + + The other conversion functions are just optimisations for common cases. +*/ +static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { + [PIX_FMT_YUV420P] = { + [PIX_FMT_YUV422] = { + .convert = yuv420p_to_yuv422, + }, + [PIX_FMT_RGB555] = { + .convert = yuv420p_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = yuv420p_to_rgb565 + }, + [PIX_FMT_BGR24] = { + .convert = yuv420p_to_bgr24 + }, + [PIX_FMT_RGB24] = { + .convert = yuv420p_to_rgb24 + }, + [PIX_FMT_RGBA32] = { + .convert = yuv420p_to_rgba32 + }, + [PIX_FMT_UYVY422] = { + .convert = yuv420p_to_uyvy422, + }, + }, + [PIX_FMT_YUV422P] = { + [PIX_FMT_YUV422] = { + .convert = yuv422p_to_yuv422, + }, + [PIX_FMT_UYVY422] = { + .convert = yuv422p_to_uyvy422, + }, + }, + [PIX_FMT_YUV444P] = { + [PIX_FMT_RGB24] = { + .convert = yuv444p_to_rgb24 + }, + }, + [PIX_FMT_YUVJ420P] = { + [PIX_FMT_RGB555] = { + .convert = yuvj420p_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = yuvj420p_to_rgb565 + }, + [PIX_FMT_BGR24] = { + .convert = yuvj420p_to_bgr24 + }, + [PIX_FMT_RGB24] = { + .convert = yuvj420p_to_rgb24 + }, + [PIX_FMT_RGBA32] = { + .convert = yuvj420p_to_rgba32 + }, + }, + [PIX_FMT_YUVJ444P] = { + [PIX_FMT_RGB24] = { + .convert = yuvj444p_to_rgb24 + }, + }, + [PIX_FMT_YUV422] = { + [PIX_FMT_YUV420P] = { + .convert = yuv422_to_yuv420p, + }, + [PIX_FMT_YUV422P] = { + .convert = yuv422_to_yuv422p, + }, + }, + [PIX_FMT_UYVY422] = { + [PIX_FMT_YUV420P] = { + .convert = uyvy422_to_yuv420p, + }, + [PIX_FMT_YUV422P] = { + .convert = uyvy422_to_yuv422p, + }, + }, + [PIX_FMT_RGB24] = { + [PIX_FMT_YUV420P] = { + .convert = rgb24_to_yuv420p + }, + [PIX_FMT_RGB565] = { + .convert = rgb24_to_rgb565 + }, + [PIX_FMT_RGB555] = { + .convert = rgb24_to_rgb555 + }, + [PIX_FMT_RGBA32] = { + .convert = rgb24_to_rgba32 + }, + [PIX_FMT_BGR24] = { + .convert = rgb24_to_bgr24 + }, + [PIX_FMT_GRAY8] = { + .convert = rgb24_to_gray + }, + [PIX_FMT_PAL8] = { + .convert = rgb24_to_pal8 + }, + [PIX_FMT_YUV444P] = { + .convert = rgb24_to_yuv444p + }, + [PIX_FMT_YUVJ420P] = { + .convert = rgb24_to_yuvj420p + }, + [PIX_FMT_YUVJ444P] = { + .convert = rgb24_to_yuvj444p + }, + }, + [PIX_FMT_RGBA32] = { + [PIX_FMT_RGB24] = { + .convert = rgba32_to_rgb24 + }, + [PIX_FMT_RGB555] = { + .convert = rgba32_to_rgb555 + }, + [PIX_FMT_PAL8] = { + .convert = rgba32_to_pal8 + }, + [PIX_FMT_YUV420P] = { + .convert = rgba32_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = rgba32_to_gray + }, + }, + [PIX_FMT_BGR24] = { + [PIX_FMT_RGB24] = { + .convert = bgr24_to_rgb24 + }, + [PIX_FMT_YUV420P] = { + .convert = bgr24_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = bgr24_to_gray + }, + }, + [PIX_FMT_RGB555] = { + [PIX_FMT_RGB24] = { + .convert = rgb555_to_rgb24 + }, + [PIX_FMT_RGBA32] = { + .convert = rgb555_to_rgba32 + }, + [PIX_FMT_YUV420P] = { + .convert = rgb555_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = rgb555_to_gray + }, + }, + [PIX_FMT_RGB565] = { + [PIX_FMT_RGB24] = { + .convert = rgb565_to_rgb24 + }, + [PIX_FMT_YUV420P] = { + .convert = rgb565_to_yuv420p + }, + [PIX_FMT_GRAY8] = { + .convert = rgb565_to_gray + }, + }, + [PIX_FMT_GRAY8] = { + [PIX_FMT_RGB555] = { + .convert = gray_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = gray_to_rgb565 + }, + [PIX_FMT_RGB24] = { + .convert = gray_to_rgb24 + }, + [PIX_FMT_BGR24] = { + .convert = gray_to_bgr24 + }, + [PIX_FMT_RGBA32] = { + .convert = gray_to_rgba32 + }, + [PIX_FMT_MONOWHITE] = { + .convert = gray_to_monowhite + }, + [PIX_FMT_MONOBLACK] = { + .convert = gray_to_monoblack + }, + }, + [PIX_FMT_MONOWHITE] = { + [PIX_FMT_GRAY8] = { + .convert = monowhite_to_gray + }, + }, + [PIX_FMT_MONOBLACK] = { + [PIX_FMT_GRAY8] = { + .convert = monoblack_to_gray + }, + }, + [PIX_FMT_PAL8] = { + [PIX_FMT_RGB555] = { + .convert = pal8_to_rgb555 + }, + [PIX_FMT_RGB565] = { + .convert = pal8_to_rgb565 + }, + [PIX_FMT_BGR24] = { + .convert = pal8_to_bgr24 + }, + [PIX_FMT_RGB24] = { + .convert = pal8_to_rgb24 + }, + [PIX_FMT_RGBA32] = { + .convert = pal8_to_rgba32 + }, + }, + [PIX_FMT_UYVY411] = { + [PIX_FMT_YUV411P] = { + .convert = uyvy411_to_yuv411p, + }, + }, + +}; + +int avpicture_alloc(AVPicture *picture, + int pix_fmt, int width, int height) +{ + int size; + void *ptr; + + size = avpicture_get_size(pix_fmt, width, height); + if(size<0) + goto fail; + ptr = av_malloc(size); + if (!ptr) + goto fail; + avpicture_fill(picture, ptr, pix_fmt, width, height); + return 0; + fail: + memset(picture, 0, sizeof(AVPicture)); + return -1; +} + +void avpicture_free(AVPicture *picture) +{ + av_free(picture->data[0]); +} + +/* return true if yuv planar */ +static inline int is_yuv_planar(PixFmtInfo *ps) +{ + return (ps->color_type == FF_COLOR_YUV || + ps->color_type == FF_COLOR_YUV_JPEG) && + ps->pixel_type == FF_PIXEL_PLANAR; +} + +/** + * Crop image top and left side + */ +int img_crop(AVPicture *dst, const AVPicture *src, + int pix_fmt, int top_band, int left_band) +{ + int y_shift; + int x_shift; + + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt])) + return -1; + + y_shift = pix_fmt_info[pix_fmt].y_chroma_shift; + x_shift = pix_fmt_info[pix_fmt].x_chroma_shift; + + dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band; + dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift); + dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift); + + dst->linesize[0] = src->linesize[0]; + dst->linesize[1] = src->linesize[1]; + dst->linesize[2] = src->linesize[2]; + return 0; +} + +/** + * Pad image + */ +int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt, + int padtop, int padbottom, int padleft, int padright, int *color) +{ + uint8_t *optr, *iptr; + int y_shift; + int x_shift; + int yheight; + int i, y; + + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt])) + return -1; + + for (i = 0; i < 3; i++) { + x_shift = i ? pix_fmt_info[pix_fmt].x_chroma_shift : 0; + y_shift = i ? pix_fmt_info[pix_fmt].y_chroma_shift : 0; + + if (padtop || padleft) { + memset(dst->data[i], color[i], dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift)); + } + + if (padleft || padright || src) { + if (src) { /* first line */ + iptr = src->data[i]; + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift); + memcpy(optr, iptr, src->linesize[i]); + iptr += src->linesize[i]; + } + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + (dst->linesize[i] - (padright >> x_shift)); + yheight = (height - 1 - (padtop + padbottom)) >> y_shift; + for (y = 0; y < yheight; y++) { + memset(optr, color[i], (padleft + padright) >> x_shift); + if (src) { + memcpy(optr + ((padleft + padright) >> x_shift), iptr, src->linesize[i]); + iptr += src->linesize[i]; + } + optr += dst->linesize[i]; + } + } + + if (padbottom || padright) { + optr = dst->data[i] + dst->linesize[i] * ((height - padbottom) >> y_shift) - (padright >> x_shift); + memset(optr, color[i], dst->linesize[i] * (padbottom >> y_shift) + (padright >> x_shift)); + } + } + return 0; +} + +/* XXX: always use linesize. Return -1 if not supported */ +int img_convert(AVPicture *dst, int dst_pix_fmt, + const AVPicture *src, int src_pix_fmt, + int src_width, int src_height) +{ + static int inited; + int i, ret, dst_width, dst_height, int_pix_fmt; + PixFmtInfo *src_pix, *dst_pix; + ConvertEntry *ce; + AVPicture tmp1, *tmp = &tmp1; + + if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB || + dst_pix_fmt < 0 || dst_pix_fmt >= PIX_FMT_NB) + return -1; + if (src_width <= 0 || src_height <= 0) + return 0; + + if (!inited) { + inited = 1; + img_convert_init(); + } + + dst_width = src_width; + dst_height = src_height; + + dst_pix = &pix_fmt_info[dst_pix_fmt]; + src_pix = &pix_fmt_info[src_pix_fmt]; + if (src_pix_fmt == dst_pix_fmt) { + /* no conversion needed: just copy */ + img_copy(dst, src, dst_pix_fmt, dst_width, dst_height); + return 0; + } + + ce = &convert_table[src_pix_fmt][dst_pix_fmt]; + if (ce->convert) { + /* specific conversion routine */ + ce->convert(dst, src, dst_width, dst_height); + return 0; + } + + /* gray to YUV */ + if (is_yuv_planar(dst_pix) && + src_pix_fmt == PIX_FMT_GRAY8) { + int w, h, y; + uint8_t *d; + + if (dst_pix->color_type == FF_COLOR_YUV_JPEG) { + ff_img_copy_plane(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height); + } else { + img_apply_table(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height, + y_jpeg_to_ccir); + } + /* fill U and V with 128 */ + w = dst_width; + h = dst_height; + w >>= dst_pix->x_chroma_shift; + h >>= dst_pix->y_chroma_shift; + for(i = 1; i <= 2; i++) { + d = dst->data[i]; + for(y = 0; y< h; y++) { + memset(d, 128, w); + d += dst->linesize[i]; + } + } + return 0; + } + + /* YUV to gray */ + if (is_yuv_planar(src_pix) && + dst_pix_fmt == PIX_FMT_GRAY8) { + if (src_pix->color_type == FF_COLOR_YUV_JPEG) { + ff_img_copy_plane(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height); + } else { + img_apply_table(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height, + y_ccir_to_jpeg); + } + return 0; + } + + /* YUV to YUV planar */ + if (is_yuv_planar(dst_pix) && is_yuv_planar(src_pix)) { + int x_shift, y_shift, w, h, xy_shift; + void (*resize_func)(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height); + + /* compute chroma size of the smallest dimensions */ + w = dst_width; + h = dst_height; + if (dst_pix->x_chroma_shift >= src_pix->x_chroma_shift) + w >>= dst_pix->x_chroma_shift; + else + w >>= src_pix->x_chroma_shift; + if (dst_pix->y_chroma_shift >= src_pix->y_chroma_shift) + h >>= dst_pix->y_chroma_shift; + else + h >>= src_pix->y_chroma_shift; + + x_shift = (dst_pix->x_chroma_shift - src_pix->x_chroma_shift); + y_shift = (dst_pix->y_chroma_shift - src_pix->y_chroma_shift); + xy_shift = ((x_shift & 0xf) << 4) | (y_shift & 0xf); + /* there must be filters for conversion at least from and to + YUV444 format */ + switch(xy_shift) { + case 0x00: + resize_func = ff_img_copy_plane; + break; + case 0x10: + resize_func = shrink21; + break; + case 0x20: + resize_func = shrink41; + break; + case 0x01: + resize_func = shrink12; + break; + case 0x11: + resize_func = ff_shrink22; + break; + case 0x22: + resize_func = ff_shrink44; + break; + case 0xf0: + resize_func = grow21; + break; + case 0xe0: + resize_func = grow41; + break; + case 0xff: + resize_func = grow22; + break; + case 0xee: + resize_func = grow44; + break; + case 0xf1: + resize_func = conv411; + break; + default: + /* currently not handled */ + goto no_chroma_filter; + } + + ff_img_copy_plane(dst->data[0], dst->linesize[0], + src->data[0], src->linesize[0], + dst_width, dst_height); + + for(i = 1;i <= 2; i++) + resize_func(dst->data[i], dst->linesize[i], + src->data[i], src->linesize[i], + dst_width>>dst_pix->x_chroma_shift, dst_height>>dst_pix->y_chroma_shift); + /* if yuv color space conversion is needed, we do it here on + the destination image */ + if (dst_pix->color_type != src_pix->color_type) { + const uint8_t *y_table, *c_table; + if (dst_pix->color_type == FF_COLOR_YUV) { + y_table = y_jpeg_to_ccir; + c_table = c_jpeg_to_ccir; + } else { + y_table = y_ccir_to_jpeg; + c_table = c_ccir_to_jpeg; + } + img_apply_table(dst->data[0], dst->linesize[0], + dst->data[0], dst->linesize[0], + dst_width, dst_height, + y_table); + + for(i = 1;i <= 2; i++) + img_apply_table(dst->data[i], dst->linesize[i], + dst->data[i], dst->linesize[i], + dst_width>>dst_pix->x_chroma_shift, + dst_height>>dst_pix->y_chroma_shift, + c_table); + } + return 0; + } + no_chroma_filter: + + /* try to use an intermediate format */ + if (src_pix_fmt == PIX_FMT_YUV422 || + dst_pix_fmt == PIX_FMT_YUV422) { + /* specific case: convert to YUV422P first */ + int_pix_fmt = PIX_FMT_YUV422P; + } else if (src_pix_fmt == PIX_FMT_UYVY422 || + dst_pix_fmt == PIX_FMT_UYVY422) { + /* specific case: convert to YUV422P first */ + int_pix_fmt = PIX_FMT_YUV422P; + } else if (src_pix_fmt == PIX_FMT_UYVY411 || + dst_pix_fmt == PIX_FMT_UYVY411) { + /* specific case: convert to YUV411P first */ + int_pix_fmt = PIX_FMT_YUV411P; + } else if ((src_pix->color_type == FF_COLOR_GRAY && + src_pix_fmt != PIX_FMT_GRAY8) || + (dst_pix->color_type == FF_COLOR_GRAY && + dst_pix_fmt != PIX_FMT_GRAY8)) { + /* gray8 is the normalized format */ + int_pix_fmt = PIX_FMT_GRAY8; + } else if ((is_yuv_planar(src_pix) && + src_pix_fmt != PIX_FMT_YUV444P && + src_pix_fmt != PIX_FMT_YUVJ444P)) { + /* yuv444 is the normalized format */ + if (src_pix->color_type == FF_COLOR_YUV_JPEG) + int_pix_fmt = PIX_FMT_YUVJ444P; + else + int_pix_fmt = PIX_FMT_YUV444P; + } else if ((is_yuv_planar(dst_pix) && + dst_pix_fmt != PIX_FMT_YUV444P && + dst_pix_fmt != PIX_FMT_YUVJ444P)) { + /* yuv444 is the normalized format */ + if (dst_pix->color_type == FF_COLOR_YUV_JPEG) + int_pix_fmt = PIX_FMT_YUVJ444P; + else + int_pix_fmt = PIX_FMT_YUV444P; + } else { + /* the two formats are rgb or gray8 or yuv[j]444p */ + if (src_pix->is_alpha && dst_pix->is_alpha) + int_pix_fmt = PIX_FMT_RGBA32; + else + int_pix_fmt = PIX_FMT_RGB24; + } + if (avpicture_alloc(tmp, int_pix_fmt, dst_width, dst_height) < 0) + return -1; + ret = -1; + if (img_convert(tmp, int_pix_fmt, + src, src_pix_fmt, src_width, src_height) < 0) + goto fail1; + if (img_convert(dst, dst_pix_fmt, + tmp, int_pix_fmt, dst_width, dst_height) < 0) + goto fail1; + ret = 0; + fail1: + avpicture_free(tmp); + return ret; +} + +/* NOTE: we scan all the pixels to have an exact information */ +static int get_alpha_info_pal8(const AVPicture *src, int width, int height) +{ + const unsigned char *p; + int src_wrap, ret, x, y; + unsigned int a; + uint32_t *palette = (uint32_t *)src->data[1]; + + p = src->data[0]; + src_wrap = src->linesize[0] - width; + ret = 0; + for(y=0;y> 24; + if (a == 0x00) { + ret |= FF_ALPHA_TRANSP; + } else if (a != 0xff) { + ret |= FF_ALPHA_SEMI_TRANSP; + } + p++; + } + p += src_wrap; + } + return ret; +} + +/** + * Tell if an image really has transparent alpha values. + * @return ored mask of FF_ALPHA_xxx constants + */ +int img_get_alpha_info(const AVPicture *src, + int pix_fmt, int width, int height) +{ + PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; + int ret; + + pf = &pix_fmt_info[pix_fmt]; + /* no alpha can be represented in format */ + if (!pf->is_alpha) + return 0; + switch(pix_fmt) { + case PIX_FMT_RGBA32: + ret = get_alpha_info_rgba32(src, width, height); + break; + case PIX_FMT_RGB555: + ret = get_alpha_info_rgb555(src, width, height); + break; + case PIX_FMT_PAL8: + ret = get_alpha_info_pal8(src, width, height); + break; + default: + /* we do not know, so everything is indicated */ + ret = FF_ALPHA_TRANSP | FF_ALPHA_SEMI_TRANSP; + break; + } + return ret; +} + +#ifdef HAVE_MMX +#define DEINT_INPLACE_LINE_LUM \ + movd_m2r(lum_m4[0],mm0);\ + movd_m2r(lum_m3[0],mm1);\ + movd_m2r(lum_m2[0],mm2);\ + movd_m2r(lum_m1[0],mm3);\ + movd_m2r(lum[0],mm4);\ + punpcklbw_r2r(mm7,mm0);\ + movd_r2m(mm2,lum_m4[0]);\ + punpcklbw_r2r(mm7,mm1);\ + punpcklbw_r2r(mm7,mm2);\ + punpcklbw_r2r(mm7,mm3);\ + punpcklbw_r2r(mm7,mm4);\ + paddw_r2r(mm3,mm1);\ + psllw_i2r(1,mm2);\ + paddw_r2r(mm4,mm0);\ + psllw_i2r(2,mm1);\ + paddw_r2r(mm6,mm2);\ + paddw_r2r(mm2,mm1);\ + psubusw_r2r(mm0,mm1);\ + psrlw_i2r(3,mm1);\ + packuswb_r2r(mm7,mm1);\ + movd_r2m(mm1,lum_m2[0]); + +#define DEINT_LINE_LUM \ + movd_m2r(lum_m4[0],mm0);\ + movd_m2r(lum_m3[0],mm1);\ + movd_m2r(lum_m2[0],mm2);\ + movd_m2r(lum_m1[0],mm3);\ + movd_m2r(lum[0],mm4);\ + punpcklbw_r2r(mm7,mm0);\ + punpcklbw_r2r(mm7,mm1);\ + punpcklbw_r2r(mm7,mm2);\ + punpcklbw_r2r(mm7,mm3);\ + punpcklbw_r2r(mm7,mm4);\ + paddw_r2r(mm3,mm1);\ + psllw_i2r(1,mm2);\ + paddw_r2r(mm4,mm0);\ + psllw_i2r(2,mm1);\ + paddw_r2r(mm6,mm2);\ + paddw_r2r(mm2,mm1);\ + psubusw_r2r(mm0,mm1);\ + psrlw_i2r(3,mm1);\ + packuswb_r2r(mm7,mm1);\ + movd_r2m(mm1,dst[0]); +#endif + +/* filter parameters: [-1 4 2 4 -1] // 8 */ +static void deinterlace_line(uint8_t *dst, + const uint8_t *lum_m4, const uint8_t *lum_m3, + const uint8_t *lum_m2, const uint8_t *lum_m1, + const uint8_t *lum, + int size) +{ +#ifndef HAVE_MMX + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int sum; + + for(;size > 0;size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + sum += lum_m1[0] << 2; + sum += -lum[0]; + dst[0] = cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + dst++; + } +#else + + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } + for (;size > 3; size-=4) { + DEINT_LINE_LUM + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + dst+=4; + } +#endif +} +static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum, + int size) +{ +#ifndef HAVE_MMX + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int sum; + + for(;size > 0;size--) { + sum = -lum_m4[0]; + sum += lum_m3[0] << 2; + sum += lum_m2[0] << 1; + lum_m4[0]=lum_m2[0]; + sum += lum_m1[0] << 2; + sum += -lum[0]; + lum_m2[0] = cm[(sum + 4) >> 3]; + lum_m4++; + lum_m3++; + lum_m2++; + lum_m1++; + lum++; + } +#else + + { + mmx_t rounder; + rounder.uw[0]=4; + rounder.uw[1]=4; + rounder.uw[2]=4; + rounder.uw[3]=4; + pxor_r2r(mm7,mm7); + movq_m2r(rounder,mm6); + } + for (;size > 3; size-=4) { + DEINT_INPLACE_LINE_LUM + lum_m4+=4; + lum_m3+=4; + lum_m2+=4; + lum_m1+=4; + lum+=4; + } +#endif +} + +/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The + top field is copied as is, but the bottom field is deinterlaced + against the top field. */ +static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap, + const uint8_t *src1, int src_wrap, + int width, int height) +{ + const uint8_t *src_m2, *src_m1, *src_0, *src_p1, *src_p2; + int y; + + src_m2 = src1; + src_m1 = src1; + src_0=&src_m1[src_wrap]; + src_p1=&src_0[src_wrap]; + src_p2=&src_p1[src_wrap]; + for(y=0;y<(height-2);y+=2) { + memcpy(dst,src_m1,width); + dst += dst_wrap; + deinterlace_line(dst,src_m2,src_m1,src_0,src_p1,src_p2,width); + src_m2 = src_0; + src_m1 = src_p1; + src_0 = src_p2; + src_p1 += 2*src_wrap; + src_p2 += 2*src_wrap; + dst += dst_wrap; + } + memcpy(dst,src_m1,width); + dst += dst_wrap; + /* do last line */ + deinterlace_line(dst,src_m2,src_m1,src_0,src_0,src_0,width); +} + +static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap, + int width, int height) +{ + uint8_t *src_m1, *src_0, *src_p1, *src_p2; + int y; + uint8_t *buf; + buf = (uint8_t*)av_malloc(width); + + src_m1 = src1; + memcpy(buf,src_m1,width); + src_0=&src_m1[src_wrap]; + src_p1=&src_0[src_wrap]; + src_p2=&src_p1[src_wrap]; + for(y=0;y<(height-2);y+=2) { + deinterlace_line_inplace(buf,src_m1,src_0,src_p1,src_p2,width); + src_m1 = src_p1; + src_0 = src_p2; + src_p1 += 2*src_wrap; + src_p2 += 2*src_wrap; + } + /* do last line */ + deinterlace_line_inplace(buf,src_m1,src_0,src_0,src_0,width); + av_free(buf); +} + + +/* deinterlace - if not supported return -1 */ +int avpicture_deinterlace(AVPicture *dst, const AVPicture *src, + int pix_fmt, int width, int height) +{ + int i; + + if (pix_fmt != PIX_FMT_YUV420P && + pix_fmt != PIX_FMT_YUV422P && + pix_fmt != PIX_FMT_YUV444P && + pix_fmt != PIX_FMT_YUV411P) + return -1; + if ((width & 3) != 0 || (height & 3) != 0) + return -1; + + for(i=0;i<3;i++) { + if (i == 1) { + switch(pix_fmt) { + case PIX_FMT_YUV420P: + width >>= 1; + height >>= 1; + break; + case PIX_FMT_YUV422P: + width >>= 1; + break; + case PIX_FMT_YUV411P: + width >>= 2; + break; + default: + break; + } + } + if (src == dst) { + deinterlace_bottom_field_inplace(dst->data[i], dst->linesize[i], + width, height); + } else { + deinterlace_bottom_field(dst->data[i],dst->linesize[i], + src->data[i], src->linesize[i], + width, height); + } + } +#ifdef HAVE_MMX + emms(); +#endif + return 0; +} + +#undef FIX diff --git a/mpeg4/src/libavcodec/imgconvert_template.h b/mpeg4/src/libavcodec/imgconvert_template.h new file mode 100644 index 0000000000000000000000000000000000000000..e58b0cae27cc278c89bd961686bda06a131576b6 --- /dev/null +++ b/mpeg4/src/libavcodec/imgconvert_template.h @@ -0,0 +1,857 @@ +/* + * Templates for image convertion routines + * Copyright (c) 2001, 2002, 2003 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef RGB_OUT +#define RGB_OUT(d, r, g, b) RGBA_OUT(d, r, g, b, 0xff) +#endif + +static void glue(yuv420p_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr; + uint8_t *d, *d1, *d2; + int w, y, cb, cr, r_add, g_add, b_add, width2; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + unsigned int r, g, b; + + d = dst->data[0]; + y1_ptr = src->data[0]; + cb_ptr = src->data[1]; + cr_ptr = src->data[2]; + width2 = (width + 1) >> 1; + for(;height >= 2; height -= 2) { + d1 = d; + d2 = d + dst->linesize[0]; + y2_ptr = y1_ptr + src->linesize[0]; + for(w = width; w >= 2; w -= 2) { + YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]); + /* output 4 pixels */ + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[1]); + RGB_OUT(d1 + BPP, r, g, b); + + YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[0]); + RGB_OUT(d2, r, g, b); + + YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[1]); + RGB_OUT(d2 + BPP, r, g, b); + + d1 += 2 * BPP; + d2 += 2 * BPP; + + y1_ptr += 2; + y2_ptr += 2; + cb_ptr++; + cr_ptr++; + } + /* handle odd width */ + if (w) { + YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]); + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + + YUV_TO_RGB2_CCIR(r, g, b, y2_ptr[0]); + RGB_OUT(d2, r, g, b); + d1 += BPP; + d2 += BPP; + y1_ptr++; + y2_ptr++; + cb_ptr++; + cr_ptr++; + } + d += 2 * dst->linesize[0]; + y1_ptr += 2 * src->linesize[0] - width; + cb_ptr += src->linesize[1] - width2; + cr_ptr += src->linesize[2] - width2; + } + /* handle odd height */ + if (height) { + d1 = d; + for(w = width; w >= 2; w -= 2) { + YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]); + /* output 2 pixels */ + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[1]); + RGB_OUT(d1 + BPP, r, g, b); + + d1 += 2 * BPP; + + y1_ptr += 2; + cb_ptr++; + cr_ptr++; + } + /* handle width */ + if (w) { + YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]); + /* output 2 pixels */ + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + d1 += BPP; + + y1_ptr++; + cb_ptr++; + cr_ptr++; + } + } +} + +static void glue(yuvj420p_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *y1_ptr, *y2_ptr, *cb_ptr, *cr_ptr; + uint8_t *d, *d1, *d2; + int w, y, cb, cr, r_add, g_add, b_add, width2; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + unsigned int r, g, b; + + d = dst->data[0]; + y1_ptr = src->data[0]; + cb_ptr = src->data[1]; + cr_ptr = src->data[2]; + width2 = (width + 1) >> 1; + for(;height >= 2; height -= 2) { + d1 = d; + d2 = d + dst->linesize[0]; + y2_ptr = y1_ptr + src->linesize[0]; + for(w = width; w >= 2; w -= 2) { + YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]); + /* output 4 pixels */ + YUV_TO_RGB2(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + + YUV_TO_RGB2(r, g, b, y1_ptr[1]); + RGB_OUT(d1 + BPP, r, g, b); + + YUV_TO_RGB2(r, g, b, y2_ptr[0]); + RGB_OUT(d2, r, g, b); + + YUV_TO_RGB2(r, g, b, y2_ptr[1]); + RGB_OUT(d2 + BPP, r, g, b); + + d1 += 2 * BPP; + d2 += 2 * BPP; + + y1_ptr += 2; + y2_ptr += 2; + cb_ptr++; + cr_ptr++; + } + /* handle odd width */ + if (w) { + YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]); + YUV_TO_RGB2(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + + YUV_TO_RGB2(r, g, b, y2_ptr[0]); + RGB_OUT(d2, r, g, b); + d1 += BPP; + d2 += BPP; + y1_ptr++; + y2_ptr++; + cb_ptr++; + cr_ptr++; + } + d += 2 * dst->linesize[0]; + y1_ptr += 2 * src->linesize[0] - width; + cb_ptr += src->linesize[1] - width2; + cr_ptr += src->linesize[2] - width2; + } + /* handle odd height */ + if (height) { + d1 = d; + for(w = width; w >= 2; w -= 2) { + YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]); + /* output 2 pixels */ + YUV_TO_RGB2(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + + YUV_TO_RGB2(r, g, b, y1_ptr[1]); + RGB_OUT(d1 + BPP, r, g, b); + + d1 += 2 * BPP; + + y1_ptr += 2; + cb_ptr++; + cr_ptr++; + } + /* handle width */ + if (w) { + YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]); + /* output 2 pixels */ + YUV_TO_RGB2(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + d1 += BPP; + + y1_ptr++; + cb_ptr++; + cr_ptr++; + } + } +} + +static void glue(RGB_NAME, _to_yuv420p)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int wrap, wrap3, width2; + int r, g, b, r1, g1, b1, w; + uint8_t *lum, *cb, *cr; + const uint8_t *p; + + lum = dst->data[0]; + cb = dst->data[1]; + cr = dst->data[2]; + + width2 = (width + 1) >> 1; + wrap = dst->linesize[0]; + wrap3 = src->linesize[0]; + p = src->data[0]; + for(;height>=2;height -= 2) { + for(w = width; w >= 2; w -= 2) { + RGB_IN(r, g, b, p); + r1 = r; + g1 = g; + b1 = b; + lum[0] = RGB_TO_Y_CCIR(r, g, b); + + RGB_IN(r, g, b, p + BPP); + r1 += r; + g1 += g; + b1 += b; + lum[1] = RGB_TO_Y_CCIR(r, g, b); + p += wrap3; + lum += wrap; + + RGB_IN(r, g, b, p); + r1 += r; + g1 += g; + b1 += b; + lum[0] = RGB_TO_Y_CCIR(r, g, b); + + RGB_IN(r, g, b, p + BPP); + r1 += r; + g1 += g; + b1 += b; + lum[1] = RGB_TO_Y_CCIR(r, g, b); + + cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 2); + cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 2); + + cb++; + cr++; + p += -wrap3 + 2 * BPP; + lum += -wrap + 2; + } + if (w) { + RGB_IN(r, g, b, p); + r1 = r; + g1 = g; + b1 = b; + lum[0] = RGB_TO_Y_CCIR(r, g, b); + p += wrap3; + lum += wrap; + RGB_IN(r, g, b, p); + r1 += r; + g1 += g; + b1 += b; + lum[0] = RGB_TO_Y_CCIR(r, g, b); + cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 1); + cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 1); + cb++; + cr++; + p += -wrap3 + BPP; + lum += -wrap + 1; + } + p += wrap3 + (wrap3 - width * BPP); + lum += wrap + (wrap - width); + cb += dst->linesize[1] - width2; + cr += dst->linesize[2] - width2; + } + /* handle odd height */ + if (height) { + for(w = width; w >= 2; w -= 2) { + RGB_IN(r, g, b, p); + r1 = r; + g1 = g; + b1 = b; + lum[0] = RGB_TO_Y_CCIR(r, g, b); + + RGB_IN(r, g, b, p + BPP); + r1 += r; + g1 += g; + b1 += b; + lum[1] = RGB_TO_Y_CCIR(r, g, b); + cb[0] = RGB_TO_U_CCIR(r1, g1, b1, 1); + cr[0] = RGB_TO_V_CCIR(r1, g1, b1, 1); + cb++; + cr++; + p += 2 * BPP; + lum += 2; + } + if (w) { + RGB_IN(r, g, b, p); + lum[0] = RGB_TO_Y_CCIR(r, g, b); + cb[0] = RGB_TO_U_CCIR(r, g, b, 0); + cr[0] = RGB_TO_V_CCIR(r, g, b, 0); + } + } +} + +static void glue(RGB_NAME, _to_gray)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const unsigned char *p; + unsigned char *q; + int r, g, b, dst_wrap, src_wrap; + int x, y; + + p = src->data[0]; + src_wrap = src->linesize[0] - BPP * width; + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - width; + + for(y=0;ydata[0]; + src_wrap = src->linesize[0] - width; + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - BPP * width; + + for(y=0;ydata[0]; + src_wrap = src->linesize[0] - width; + palette = (uint32_t *)src->data[1]; + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - BPP * width; + + for(y=0;y> 16) & 0xff; + g = (v >> 8) & 0xff; + b = (v) & 0xff; +#ifdef RGBA_OUT + { + int a; + a = (v >> 24) & 0xff; + RGBA_OUT(q, r, g, b, a); + } +#else + RGB_OUT(q, r, g, b); +#endif + q += BPP; + p ++; + } + p += src_wrap; + q += dst_wrap; + } +} + +#if !defined(FMT_RGBA32) && defined(RGBA_OUT) +/* alpha support */ + +static void glue(rgba32_to_, RGB_NAME)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *s; + uint8_t *d; + int src_wrap, dst_wrap, j, y; + unsigned int v, r, g, b, a; + + s = src->data[0]; + src_wrap = src->linesize[0] - width * 4; + + d = dst->data[0]; + dst_wrap = dst->linesize[0] - width * BPP; + + for(y=0;y> 24) & 0xff; + r = (v >> 16) & 0xff; + g = (v >> 8) & 0xff; + b = v & 0xff; + RGBA_OUT(d, r, g, b, a); + s += 4; + d += BPP; + } + s += src_wrap; + d += dst_wrap; + } +} + +static void glue(RGB_NAME, _to_rgba32)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *s; + uint8_t *d; + int src_wrap, dst_wrap, j, y; + unsigned int r, g, b, a; + + s = src->data[0]; + src_wrap = src->linesize[0] - width * BPP; + + d = dst->data[0]; + dst_wrap = dst->linesize[0] - width * 4; + + for(y=0;ydata[0]; + src_wrap = src->linesize[0] - width * 3; + + d = dst->data[0]; + dst_wrap = dst->linesize[0] - width * BPP; + + for(y=0;ydata[0]; + src_wrap = src->linesize[0] - width * BPP; + + d = dst->data[0]; + dst_wrap = dst->linesize[0] - width * 3; + + for(y=0;ydata[0]; + y1_ptr = src->data[0]; + cb_ptr = src->data[1]; + cr_ptr = src->data[2]; + for(;height > 0; height --) { + d1 = d; + for(w = width; w > 0; w--) { + YUV_TO_RGB1_CCIR(cb_ptr[0], cr_ptr[0]); + + YUV_TO_RGB2_CCIR(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + d1 += BPP; + + y1_ptr++; + cb_ptr++; + cr_ptr++; + } + d += dst->linesize[0]; + y1_ptr += src->linesize[0] - width; + cb_ptr += src->linesize[1] - width; + cr_ptr += src->linesize[2] - width; + } +} + +static void yuvj444p_to_rgb24(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *y1_ptr, *cb_ptr, *cr_ptr; + uint8_t *d, *d1; + int w, y, cb, cr, r_add, g_add, b_add; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + unsigned int r, g, b; + + d = dst->data[0]; + y1_ptr = src->data[0]; + cb_ptr = src->data[1]; + cr_ptr = src->data[2]; + for(;height > 0; height --) { + d1 = d; + for(w = width; w > 0; w--) { + YUV_TO_RGB1(cb_ptr[0], cr_ptr[0]); + + YUV_TO_RGB2(r, g, b, y1_ptr[0]); + RGB_OUT(d1, r, g, b); + d1 += BPP; + + y1_ptr++; + cb_ptr++; + cr_ptr++; + } + d += dst->linesize[0]; + y1_ptr += src->linesize[0] - width; + cb_ptr += src->linesize[1] - width; + cr_ptr += src->linesize[2] - width; + } +} + +static void rgb24_to_yuv444p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int src_wrap, x, y; + int r, g, b; + uint8_t *lum, *cb, *cr; + const uint8_t *p; + + lum = dst->data[0]; + cb = dst->data[1]; + cr = dst->data[2]; + + src_wrap = src->linesize[0] - width * BPP; + p = src->data[0]; + for(y=0;ylinesize[0] - width; + cb += dst->linesize[1] - width; + cr += dst->linesize[2] - width; + } +} + +static void rgb24_to_yuvj420p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int wrap, wrap3, width2; + int r, g, b, r1, g1, b1, w; + uint8_t *lum, *cb, *cr; + const uint8_t *p; + + lum = dst->data[0]; + cb = dst->data[1]; + cr = dst->data[2]; + + width2 = (width + 1) >> 1; + wrap = dst->linesize[0]; + wrap3 = src->linesize[0]; + p = src->data[0]; + for(;height>=2;height -= 2) { + for(w = width; w >= 2; w -= 2) { + RGB_IN(r, g, b, p); + r1 = r; + g1 = g; + b1 = b; + lum[0] = RGB_TO_Y(r, g, b); + + RGB_IN(r, g, b, p + BPP); + r1 += r; + g1 += g; + b1 += b; + lum[1] = RGB_TO_Y(r, g, b); + p += wrap3; + lum += wrap; + + RGB_IN(r, g, b, p); + r1 += r; + g1 += g; + b1 += b; + lum[0] = RGB_TO_Y(r, g, b); + + RGB_IN(r, g, b, p + BPP); + r1 += r; + g1 += g; + b1 += b; + lum[1] = RGB_TO_Y(r, g, b); + + cb[0] = RGB_TO_U(r1, g1, b1, 2); + cr[0] = RGB_TO_V(r1, g1, b1, 2); + + cb++; + cr++; + p += -wrap3 + 2 * BPP; + lum += -wrap + 2; + } + if (w) { + RGB_IN(r, g, b, p); + r1 = r; + g1 = g; + b1 = b; + lum[0] = RGB_TO_Y(r, g, b); + p += wrap3; + lum += wrap; + RGB_IN(r, g, b, p); + r1 += r; + g1 += g; + b1 += b; + lum[0] = RGB_TO_Y(r, g, b); + cb[0] = RGB_TO_U(r1, g1, b1, 1); + cr[0] = RGB_TO_V(r1, g1, b1, 1); + cb++; + cr++; + p += -wrap3 + BPP; + lum += -wrap + 1; + } + p += wrap3 + (wrap3 - width * BPP); + lum += wrap + (wrap - width); + cb += dst->linesize[1] - width2; + cr += dst->linesize[2] - width2; + } + /* handle odd height */ + if (height) { + for(w = width; w >= 2; w -= 2) { + RGB_IN(r, g, b, p); + r1 = r; + g1 = g; + b1 = b; + lum[0] = RGB_TO_Y(r, g, b); + + RGB_IN(r, g, b, p + BPP); + r1 += r; + g1 += g; + b1 += b; + lum[1] = RGB_TO_Y(r, g, b); + cb[0] = RGB_TO_U(r1, g1, b1, 1); + cr[0] = RGB_TO_V(r1, g1, b1, 1); + cb++; + cr++; + p += 2 * BPP; + lum += 2; + } + if (w) { + RGB_IN(r, g, b, p); + lum[0] = RGB_TO_Y(r, g, b); + cb[0] = RGB_TO_U(r, g, b, 0); + cr[0] = RGB_TO_V(r, g, b, 0); + } + } +} + +static void rgb24_to_yuvj444p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int src_wrap, x, y; + int r, g, b; + uint8_t *lum, *cb, *cr; + const uint8_t *p; + + lum = dst->data[0]; + cb = dst->data[1]; + cr = dst->data[2]; + + src_wrap = src->linesize[0] - width * BPP; + p = src->data[0]; + for(y=0;ylinesize[0] - width; + cb += dst->linesize[1] - width; + cr += dst->linesize[2] - width; + } +} + +#endif /* FMT_RGB24 */ + +#if defined(FMT_RGB24) || defined(FMT_RGBA32) + +static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const unsigned char *p; + unsigned char *q; + int dst_wrap, src_wrap; + int x, y, has_alpha; + unsigned int r, g, b; + + p = src->data[0]; + src_wrap = src->linesize[0] - BPP * width; + + q = dst->data[0]; + dst_wrap = dst->linesize[0] - width; + has_alpha = 0; + + for(y=0;ydata[1], has_alpha); +} + +#endif /* defined(FMT_RGB24) || defined(FMT_RGBA32) */ + +#ifdef RGBA_IN + +static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src, + int width, int height) +{ + const unsigned char *p; + int src_wrap, ret, x, y; + unsigned int r, g, b, a; + + p = src->data[0]; + src_wrap = src->linesize[0] - BPP * width; + ret = 0; + for(y=0;y> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); +} + +/* This function must be optimized */ +static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, + int src_width, int src_start, int src_incr, + int16_t *filters) +{ + int src_pos, phase, sum, i; + const uint8_t *s; + int16_t *filter; + + src_pos = src_start; + for(i=0;i> POS_FRAC_BITS) < 0 || + (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) + av_abort(); +#endif + s = src + (src_pos >> POS_FRAC_BITS); + phase = get_phase(src_pos); + filter = filters + phase * NB_TAPS; +#if NB_TAPS == 4 + sum = s[0] * filter[0] + + s[1] * filter[1] + + s[2] * filter[2] + + s[3] * filter[3]; +#else + { + int j; + sum = 0; + for(j=0;j> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + src_pos += src_incr; + dst++; + } +} + +/* This function must be optimized */ +static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, + int wrap, int16_t *filter) +{ + int sum, i; + const uint8_t *s; + + s = src; + for(i=0;i> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + dst++; + s++; + } +} + +#ifdef HAVE_MMX + +#include "i386/mmx.h" + +#define FILTER4(reg) \ +{\ + s = src + (src_pos >> POS_FRAC_BITS);\ + phase = get_phase(src_pos);\ + filter = filters + phase * NB_TAPS;\ + movq_m2r(*s, reg);\ + punpcklbw_r2r(mm7, reg);\ + movq_m2r(*filter, mm6);\ + pmaddwd_r2r(reg, mm6);\ + movq_r2r(mm6, reg);\ + psrlq_i2r(32, reg);\ + paddd_r2r(mm6, reg);\ + psrad_i2r(FILTER_BITS, reg);\ + src_pos += src_incr;\ +} + +#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); + +/* XXX: do four pixels at a time */ +static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, + const uint8_t *src, int src_width, + int src_start, int src_incr, int16_t *filters) +{ + int src_pos, phase; + const uint8_t *s; + int16_t *filter; + mmx_t tmp; + + src_pos = src_start; + pxor_r2r(mm7, mm7); + + while (dst_width >= 4) { + + FILTER4(mm0); + FILTER4(mm1); + FILTER4(mm2); + FILTER4(mm3); + + packuswb_r2r(mm7, mm0); + packuswb_r2r(mm7, mm1); + packuswb_r2r(mm7, mm3); + packuswb_r2r(mm7, mm2); + movq_r2m(mm0, tmp); + dst[0] = tmp.ub[0]; + movq_r2m(mm1, tmp); + dst[1] = tmp.ub[0]; + movq_r2m(mm2, tmp); + dst[2] = tmp.ub[0]; + movq_r2m(mm3, tmp); + dst[3] = tmp.ub[0]; + dst += 4; + dst_width -= 4; + } + while (dst_width > 0) { + FILTER4(mm0); + packuswb_r2r(mm7, mm0); + movq_r2m(mm0, tmp); + dst[0] = tmp.ub[0]; + dst++; + dst_width--; + } + emms(); +} + +static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, + int wrap, int16_t *filter) +{ + int sum, i, v; + const uint8_t *s; + mmx_t tmp; + mmx_t coefs[4]; + + for(i=0;i<4;i++) { + v = filter[i]; + coefs[i].uw[0] = v; + coefs[i].uw[1] = v; + coefs[i].uw[2] = v; + coefs[i].uw[3] = v; + } + + pxor_r2r(mm7, mm7); + s = src; + while (dst_width >= 4) { + movq_m2r(s[0 * wrap], mm0); + punpcklbw_r2r(mm7, mm0); + movq_m2r(s[1 * wrap], mm1); + punpcklbw_r2r(mm7, mm1); + movq_m2r(s[2 * wrap], mm2); + punpcklbw_r2r(mm7, mm2); + movq_m2r(s[3 * wrap], mm3); + punpcklbw_r2r(mm7, mm3); + + pmullw_m2r(coefs[0], mm0); + pmullw_m2r(coefs[1], mm1); + pmullw_m2r(coefs[2], mm2); + pmullw_m2r(coefs[3], mm3); + + paddw_r2r(mm1, mm0); + paddw_r2r(mm3, mm2); + paddw_r2r(mm2, mm0); + psraw_i2r(FILTER_BITS, mm0); + + packuswb_r2r(mm7, mm0); + movq_r2m(mm0, tmp); + + *(uint32_t *)dst = tmp.ud[0]; + dst += 4; + s += 4; + dst_width -= 4; + } + while (dst_width > 0) { + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; + sum = sum >> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + dst++; + s++; + dst_width--; + } + emms(); +} +#endif + +#ifdef HAVE_ALTIVEC +typedef union { + vector unsigned char v; + unsigned char c[16]; +} vec_uc_t; + +typedef union { + vector signed short v; + signed short s[8]; +} vec_ss_t; + +void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, + int wrap, int16_t *filter) +{ + int sum, i; + const uint8_t *s; + vector unsigned char *tv, tmp, dstv, zero; + vec_ss_t srchv[4], srclv[4], fv[4]; + vector signed short zeros, sumhv, sumlv; + s = src; + + for(i=0;i<4;i++) + { + /* + The vec_madds later on does an implicit >>15 on the result. + Since FILTER_BITS is 8, and we have 15 bits of magnitude in + a signed short, we have just enough bits to pre-shift our + filter constants <<7 to compensate for vec_madds. + */ + fv[i].s[0] = filter[i] << (15-FILTER_BITS); + fv[i].v = vec_splat(fv[i].v, 0); + } + + zero = vec_splat_u8(0); + zeros = vec_splat_s16(0); + + + /* + When we're resampling, we'd ideally like both our input buffers, + and output buffers to be 16-byte aligned, so we can do both aligned + reads and writes. Sadly we can't always have this at the moment, so + we opt for aligned writes, as unaligned writes have a huge overhead. + To do this, do enough scalar resamples to get dst 16-byte aligned. + */ + i = (-(int)dst) & 0xf; + while(i>0) { + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; + sum = sum >> FILTER_BITS; + if (sum<0) sum = 0; else if (sum>255) sum=255; + dst[0] = sum; + dst++; + s++; + dst_width--; + i--; + } + + /* Do our altivec resampling on 16 pixels at once. */ + while(dst_width>=16) { + /* + Read 16 (potentially unaligned) bytes from each of + 4 lines into 4 vectors, and split them into shorts. + Interleave the multipy/accumulate for the resample + filter with the loads to hide the 3 cycle latency + the vec_madds have. + */ + tv = (vector unsigned char *) &s[0 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); + srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[0].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); + sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); + + tv = (vector unsigned char *) &s[1 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); + srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[1].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); + sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); + + tv = (vector unsigned char *) &s[2 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); + srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[2].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); + sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); + + tv = (vector unsigned char *) &s[3 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); + srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[3].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); + sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); + + /* + Pack the results into our destination vector, + and do an aligned write of that back to memory. + */ + dstv = vec_packsu(sumhv, sumlv) ; + vec_st(dstv, 0, (vector unsigned char *) dst); + + dst+=16; + s+=16; + dst_width-=16; + } + + /* + If there are any leftover pixels, resample them + with the slow scalar method. + */ + while(dst_width>0) { + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; + sum = sum >> FILTER_BITS; + if (sum<0) sum = 0; else if (sum>255) sum=255; + dst[0] = sum; + dst++; + s++; + dst_width--; + } +} +#endif + +/* slow version to handle limit cases. Does not need optimisation */ +static void h_resample_slow(uint8_t *dst, int dst_width, + const uint8_t *src, int src_width, + int src_start, int src_incr, int16_t *filters) +{ + int src_pos, phase, sum, j, v, i; + const uint8_t *s, *src_end; + int16_t *filter; + + src_end = src + src_width; + src_pos = src_start; + for(i=0;i> POS_FRAC_BITS); + phase = get_phase(src_pos); + filter = filters + phase * NB_TAPS; + sum = 0; + for(j=0;j= src_end) + v = src_end[-1]; + else + v = s[0]; + sum += v * filter[j]; + s++; + } + sum = sum >> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + src_pos += src_incr; + dst++; + } +} + +static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, + int src_width, int src_start, int src_incr, + int16_t *filters) +{ + int n, src_end; + + if (src_start < 0) { + n = (0 - src_start + src_incr - 1) / src_incr; + h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); + dst += n; + dst_width -= n; + src_start += n * src_incr; + } + src_end = src_start + dst_width * src_incr; + if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { + n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / + src_incr; + } else { + n = dst_width; + } +#ifdef HAVE_MMX + if ((mm_flags & MM_MMX) && NB_TAPS == 4) + h_resample_fast4_mmx(dst, n, + src, src_width, src_start, src_incr, filters); + else +#endif + h_resample_fast(dst, n, + src, src_width, src_start, src_incr, filters); + if (n < dst_width) { + dst += n; + dst_width -= n; + src_start += n * src_incr; + h_resample_slow(dst, dst_width, + src, src_width, src_start, src_incr, filters); + } +} + +static void component_resample(ImgReSampleContext *s, + uint8_t *output, int owrap, int owidth, int oheight, + uint8_t *input, int iwrap, int iwidth, int iheight) +{ + int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; + uint8_t *new_line, *src_line; + + last_src_y = - FCENTER - 1; + /* position of the bottom of the filter in the source image */ + src_y = (last_src_y + NB_TAPS) * POS_FRAC; + ring_y = NB_TAPS; /* position in ring buffer */ + for(y=0;y> POS_FRAC_BITS; + while (last_src_y < src_y1) { + if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) + ring_y = NB_TAPS; + last_src_y++; + /* handle limit conditions : replicate line (slightly + inefficient because we filter multiple times) */ + y1 = last_src_y; + if (y1 < 0) { + y1 = 0; + } else if (y1 >= iheight) { + y1 = iheight - 1; + } + src_line = input + y1 * iwrap; + new_line = s->line_buf + ring_y * owidth; + /* apply filter and handle limit cases correctly */ + h_resample(new_line, owidth, + src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, + &s->h_filters[0][0]); + /* handle ring buffer wraping */ + if (ring_y >= LINE_BUF_HEIGHT) { + memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, + new_line, owidth); + } + } + /* apply vertical filter */ + phase_y = get_phase(src_y); +#ifdef HAVE_MMX + /* desactivated MMX because loss of precision */ + if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) + v_resample4_mmx(output, owidth, + s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, + &s->v_filters[phase_y][0]); + else +#endif +#ifdef HAVE_ALTIVEC + if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) + v_resample16_altivec(output, owidth, + s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, + &s->v_filters[phase_y][0]); + else +#endif + v_resample(output, owidth, + s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, + &s->v_filters[phase_y][0]); + + src_y += s->v_incr; + + output += owrap; + } +} + +ImgReSampleContext *img_resample_init(int owidth, int oheight, + int iwidth, int iheight) +{ + return img_resample_full_init(owidth, oheight, iwidth, iheight, + 0, 0, 0, 0, 0, 0, 0, 0); +} + +ImgReSampleContext *img_resample_full_init(int owidth, int oheight, + int iwidth, int iheight, + int topBand, int bottomBand, + int leftBand, int rightBand, + int padtop, int padbottom, + int padleft, int padright) +{ + ImgReSampleContext *s; + + if (!owidth || !oheight || !iwidth || !iheight) + return NULL; + + s = av_mallocz(sizeof(ImgReSampleContext)); + if (!s) + return NULL; + if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS)) + return NULL; + s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); + if (!s->line_buf) + goto fail; + + s->owidth = owidth; + s->oheight = oheight; + s->iwidth = iwidth; + s->iheight = iheight; + + s->topBand = topBand; + s->bottomBand = bottomBand; + s->leftBand = leftBand; + s->rightBand = rightBand; + + s->padtop = padtop; + s->padbottom = padbottom; + s->padleft = padleft; + s->padright = padright; + + s->pad_owidth = owidth - (padleft + padright); + s->pad_oheight = oheight - (padtop + padbottom); + + s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth; + s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; + + av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth / + (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<v_filters[0][0], (float) s->pad_oheight / + (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<data[i] + (((output->linesize[i] * + s->padtop) + s->padleft) >> shift); + + component_resample(s, optr, output->linesize[i], + s->pad_owidth >> shift, s->pad_oheight >> shift, + input->data[i] + (input->linesize[i] * + (s->topBand >> shift)) + (s->leftBand >> shift), + input->linesize[i], ((s->iwidth - s->leftBand - + s->rightBand) >> shift), + (s->iheight - s->topBand - s->bottomBand) >> shift); + } +} + +void img_resample_close(ImgReSampleContext *s) +{ + av_free(s->line_buf); + av_free(s); +} + +struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, + int dstW, int dstH, int dstFormat, + int flags, SwsFilter *srcFilter, + SwsFilter *dstFilter, double *param) +{ + struct SwsContext *ctx; + + ctx = av_malloc(sizeof(struct SwsContext)); + if (ctx == NULL) { + av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n"); + + return NULL; + } + + if ((srcH != dstH) || (srcW != dstW)) { + if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) { + av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n"); + } + ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH); + } else { + ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext)); + ctx->resampling_ctx->iheight = srcH; + ctx->resampling_ctx->iwidth = srcW; + ctx->resampling_ctx->oheight = dstH; + ctx->resampling_ctx->owidth = dstW; + } + ctx->src_pix_fmt = srcFormat; + ctx->dst_pix_fmt = dstFormat; + + return ctx; +} + +void sws_freeContext(struct SwsContext *ctx) +{ + if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || + (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { + img_resample_close(ctx->resampling_ctx); + } else { + av_free(ctx->resampling_ctx); + } + av_free(ctx); +} + +int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], + int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + AVPicture src_pict, dst_pict; + int i, res = 0; + AVPicture picture_format_temp; + AVPicture picture_resample_temp, *formatted_picture, *resampled_picture; + uint8_t *buf1 = NULL, *buf2 = NULL; + enum PixelFormat current_pix_fmt; + + for (i = 0; i < 3; i++) { + src_pict.data[i] = src[i]; + src_pict.linesize[i] = srcStride[i]; + dst_pict.data[i] = dst[i]; + dst_pict.linesize[i] = dstStride[i]; + } + if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || + (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { + /* We have to rescale the picture, but only YUV420P rescaling is supported... */ + + if (ctx->src_pix_fmt != PIX_FMT_YUV420P) { + int size; + + /* create temporary picture for rescaling input*/ + size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight); + buf1 = av_malloc(size); + if (!buf1) { + res = -1; + goto the_end; + } + formatted_picture = &picture_format_temp; + avpicture_fill((AVPicture*)formatted_picture, buf1, + PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight); + + if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P, + &src_pict, ctx->src_pix_fmt, + ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) { + + av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); + res = -1; + goto the_end; + } + } else { + formatted_picture = &src_pict; + } + + if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) { + int size; + + /* create temporary picture for rescaling output*/ + size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); + buf2 = av_malloc(size); + if (!buf2) { + res = -1; + goto the_end; + } + resampled_picture = &picture_resample_temp; + avpicture_fill((AVPicture*)resampled_picture, buf2, + PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); + + } else { + resampled_picture = &dst_pict; + } + + /* ...and finally rescale!!! */ + img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture); + current_pix_fmt = PIX_FMT_YUV420P; + } else { + resampled_picture = &src_pict; + current_pix_fmt = ctx->src_pix_fmt; + } + + if (current_pix_fmt != ctx->dst_pix_fmt) { + if (img_convert(&dst_pict, ctx->dst_pix_fmt, + resampled_picture, current_pix_fmt, + ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) { + + av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); + + res = -1; + goto the_end; + } + } + +the_end: + av_free(buf1); + av_free(buf2); + return res; +} + + +#ifdef TEST +#include + +/* input */ +#define XSIZE 256 +#define YSIZE 256 +uint8_t img[XSIZE * YSIZE]; + +/* output */ +#define XSIZE1 512 +#define YSIZE1 512 +uint8_t img1[XSIZE1 * YSIZE1]; +uint8_t img2[XSIZE1 * YSIZE1]; + +void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) +{ +#undef fprintf + FILE *f; + f=fopen(filename,"w"); + fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); + fwrite(img,1, xsize * ysize,f); + fclose(f); +#define fprintf please_use_av_log +} + +static void dump_filter(int16_t *filter) +{ + int i, ph; + + for(ph=0;phh_filters[0][0]); + component_resample(s, img1, xsize, xsize, ysize, + img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); + img_resample_close(s); + + snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i); + save_pgm(buf, img1, xsize, ysize); + } + + /* mmx test */ +#ifdef HAVE_MMX + av_log(NULL, AV_LOG_INFO, "MMX test\n"); + fact = 0.72; + xsize = (int)(XSIZE * fact); + ysize = (int)(YSIZE * fact); + mm_flags = MM_MMX; + s = img_resample_init(xsize, ysize, XSIZE, YSIZE); + component_resample(s, img1, xsize, xsize, ysize, + img, XSIZE, XSIZE, YSIZE); + + mm_flags = 0; + s = img_resample_init(xsize, ysize, XSIZE, YSIZE); + component_resample(s, img2, xsize, xsize, ysize, + img, XSIZE, XSIZE, YSIZE); + if (memcmp(img1, img2, xsize * ysize) != 0) { + av_log(NULL, AV_LOG_ERROR, "mmx error\n"); + exit(1); + } + av_log(NULL, AV_LOG_INFO, "MMX OK\n"); +#endif + return 0; +} + +#endif diff --git a/mpeg4/src/libavcodec/indeo2.c b/mpeg4/src/libavcodec/indeo2.c new file mode 100644 index 0000000000000000000000000000000000000000..3814e5250f3a7911b34b40285f568d3f17790d08 --- /dev/null +++ b/mpeg4/src/libavcodec/indeo2.c @@ -0,0 +1,219 @@ +/* + * Indel Indeo 2 codec + * Copyright (c) 2005 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file indeo2.c + * Intel Indeo 2 decoder. + */ +#define ALT_BITSTREAM_READER_LE +#include "avcodec.h" +#include "bitstream.h" +#include "indeo2data.h" + +typedef struct Ir2Context{ + AVCodecContext *avctx; + AVFrame picture; + GetBitContext gb; + int decode_delta; +} Ir2Context; + +#define CODE_VLC_BITS 14 +static VLC ir2_vlc; + +/* Indeo 2 codes are in range 0x01..0x7F and 0x81..0x90 */ +static inline int ir2_get_code(GetBitContext *gb) +{ + return get_vlc2(gb, ir2_vlc.table, CODE_VLC_BITS, 1) + 1; +} + +static int ir2_decode_plane(Ir2Context *ctx, int width, int height, uint8_t *dst, int stride, + const uint8_t *table) +{ + int i; + int j; + int out = 0; + int c; + int t; + + if(width&1) + return -1; + + /* first line contain absolute values, other lines contain deltas */ + while (out < width){ + c = ir2_get_code(&ctx->gb); + if(c >= 0x80) { /* we have a run */ + c -= 0x7F; + if(out + c*2 > width) + return -1; + for (i = 0; i < c * 2; i++) + dst[out++] = 0x80; + } else { /* copy two values from table */ + dst[out++] = table[c * 2]; + dst[out++] = table[(c * 2) + 1]; + } + } + dst += stride; + + for (j = 1; j < height; j++){ + out = 0; + while (out < width){ + c = ir2_get_code(&ctx->gb); + if(c >= 0x80) { /* we have a skip */ + c -= 0x7F; + if(out + c*2 > width) + return -1; + for (i = 0; i < c * 2; i++) { + dst[out] = dst[out - stride]; + out++; + } + } else { /* add two deltas from table */ + t = dst[out - stride] + (table[c * 2] - 128); + t= clip_uint8(t); + dst[out] = t; + out++; + t = dst[out - stride] + (table[(c * 2) + 1] - 128); + t= clip_uint8(t); + dst[out] = t; + out++; + } + } + dst += stride; + } + return 0; +} + +static int ir2_decode_plane_inter(Ir2Context *ctx, int width, int height, uint8_t *dst, int stride, + const uint8_t *table) +{ + int j; + int out = 0; + int c; + int t; + + if(width&1) + return -1; + + for (j = 0; j < height; j++){ + out = 0; + while (out < width){ + c = ir2_get_code(&ctx->gb); + if(c >= 0x80) { /* we have a skip */ + c -= 0x7F; + out += c * 2; + } else { /* add two deltas from table */ + t = dst[out] + (((table[c * 2] - 128)*3) >> 2); + t= clip_uint8(t); + dst[out] = t; + out++; + t = dst[out] + (((table[(c * 2) + 1] - 128)*3) >> 2); + t= clip_uint8(t); + dst[out] = t; + out++; + } + } + dst += stride; + } + return 0; +} + +static int ir2_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + Ir2Context * const s = avctx->priv_data; + AVFrame *picture = data; + AVFrame * const p= (AVFrame*)&s->picture; + int start; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference = 1; + p->buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, p)) { + av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + + s->decode_delta = buf[18]; + + /* decide whether frame uses deltas or not */ +#ifndef ALT_BITSTREAM_READER_LE + for (i = 0; i < buf_size; i++) + buf[i] = ff_reverse[buf[i]]; +#endif + start = 48; /* hardcoded for now */ + + init_get_bits(&s->gb, buf + start, buf_size - start); + + if (s->decode_delta) { /* intraframe */ + ir2_decode_plane(s, avctx->width, avctx->height, + s->picture.data[0], s->picture.linesize[0], ir2_luma_table); + /* swapped U and V */ + ir2_decode_plane(s, avctx->width >> 2, avctx->height >> 2, + s->picture.data[2], s->picture.linesize[2], ir2_luma_table); + ir2_decode_plane(s, avctx->width >> 2, avctx->height >> 2, + s->picture.data[1], s->picture.linesize[1], ir2_luma_table); + } else { /* interframe */ + ir2_decode_plane_inter(s, avctx->width, avctx->height, + s->picture.data[0], s->picture.linesize[0], ir2_luma_table); + /* swapped U and V */ + ir2_decode_plane_inter(s, avctx->width >> 2, avctx->height >> 2, + s->picture.data[2], s->picture.linesize[2], ir2_luma_table); + ir2_decode_plane_inter(s, avctx->width >> 2, avctx->height >> 2, + s->picture.data[1], s->picture.linesize[1], ir2_luma_table); + } + + *picture= *(AVFrame*)&s->picture; + *data_size = sizeof(AVPicture); + + return buf_size; +} + +static int ir2_decode_init(AVCodecContext *avctx){ + Ir2Context * const ic = avctx->priv_data; + + ic->avctx = avctx; + + avctx->pix_fmt= PIX_FMT_YUV410P; + + if (!ir2_vlc.table) + init_vlc(&ir2_vlc, CODE_VLC_BITS, IR2_CODES, + &ir2_codes[0][1], 4, 2, +#ifdef ALT_BITSTREAM_READER_LE + &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE); +#else + &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC); +#endif + + return 0; +} + +AVCodec indeo2_decoder = { + "indeo2", + CODEC_TYPE_VIDEO, + CODEC_ID_INDEO2, + sizeof(Ir2Context), + ir2_decode_init, + NULL, + NULL, + ir2_decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/indeo2data.h b/mpeg4/src/libavcodec/indeo2data.h new file mode 100644 index 0000000000000000000000000000000000000000..2430b53c3fd3c541a681135002ab2e6e024157ba --- /dev/null +++ b/mpeg4/src/libavcodec/indeo2data.h @@ -0,0 +1,113 @@ +#define IR2_CODES 143 +static const uint16_t ir2_codes[IR2_CODES][2] = { +#ifdef ALT_BITSTREAM_READER_LE +{0x0000, 3}, {0x0004, 3}, {0x0006, 3}, {0x0001, 5}, +{0x0009, 5}, {0x0019, 5}, {0x000D, 5}, {0x001D, 5}, +{0x0023, 6}, {0x0013, 6}, {0x0033, 6}, {0x000B, 6}, +{0x002B, 6}, {0x001B, 6}, {0x0007, 8}, {0x0087, 8}, +{0x0027, 8}, {0x00A7, 8}, {0x0067, 8}, {0x00E7, 8}, +{0x0097, 8}, {0x0057, 8}, {0x0037, 8}, {0x00B7, 8}, +{0x00F7, 8}, {0x000F, 9}, {0x008F, 9}, {0x018F, 9}, +{0x014F, 9}, {0x00CF, 9}, {0x002F, 9}, {0x012F, 9}, +{0x01AF, 9}, {0x006F, 9}, {0x00EF, 9}, {0x01EF, 9}, +{0x001F, 10}, {0x021F, 10}, {0x011F, 10}, {0x031F, 10}, +{0x009F, 10}, {0x029F, 10}, {0x019F, 10}, {0x039F, 10}, +{0x005F, 10}, {0x025F, 10}, {0x015F, 10}, {0x035F, 10}, +{0x00DF, 10}, {0x02DF, 10}, {0x01DF, 10}, {0x03DF, 10}, +{0x003F, 13}, {0x103F, 13}, {0x083F, 13}, {0x183F, 13}, +{0x043F, 13}, {0x143F, 13}, {0x0C3F, 13}, {0x1C3F, 13}, +{0x023F, 13}, {0x123F, 13}, {0x0A3F, 13}, {0x1A3F, 13}, +{0x063F, 13}, {0x163F, 13}, {0x0E3F, 13}, {0x1E3F, 13}, +{0x013F, 13}, {0x113F, 13}, {0x093F, 13}, {0x193F, 13}, +{0x053F, 13}, {0x153F, 13}, {0x0D3F, 13}, {0x1D3F, 13}, +{0x033F, 13}, {0x133F, 13}, {0x0B3F, 13}, {0x1B3F, 13}, +{0x073F, 13}, {0x173F, 13}, {0x0F3F, 13}, {0x1F3F, 13}, +{0x00BF, 13}, {0x10BF, 13}, {0x08BF, 13}, {0x18BF, 13}, +{0x04BF, 13}, {0x14BF, 13}, {0x0CBF, 13}, {0x1CBF, 13}, +{0x02BF, 13}, {0x12BF, 13}, {0x0ABF, 13}, {0x1ABF, 13}, +{0x06BF, 13}, {0x16BF, 13}, {0x0EBF, 13}, {0x1EBF, 13}, +{0x01BF, 13}, {0x11BF, 13}, {0x09BF, 13}, {0x19BF, 13}, +{0x05BF, 13}, {0x15BF, 13}, {0x0DBF, 13}, {0x1DBF, 13}, +{0x03BF, 13}, {0x13BF, 13}, {0x0BBF, 13}, {0x1BBF, 13}, +{0x07BF, 13}, {0x17BF, 13}, {0x0FBF, 13}, {0x1FBF, 13}, +{0x007F, 14}, {0x207F, 14}, {0x107F, 14}, {0x307F, 14}, +{0x087F, 14}, {0x287F, 14}, {0x187F, 14}, {0x387F, 14}, +{0x047F, 14}, {0x247F, 14}, {0x147F, 14}, {0x0002, 3}, +{0x0011, 5}, {0x0005, 5}, {0x0015, 5}, {0x0003, 6}, +{0x003B, 6}, {0x0047, 8}, {0x00C7, 8}, {0x0017, 8}, +{0x00D7, 8}, {0x0077, 8}, {0x010F, 9}, {0x004F, 9}, +{0x01CF, 9}, {0x00AF, 9}, {0x016F, 9}, +#else + {0x0000, 3}, {0x0001, 3}, {0x0003, 3}, {0x0010, 5}, + {0x0012, 5}, {0x0013, 5}, {0x0016, 5}, {0x0017, 5}, + {0x0031, 6}, {0x0032, 6}, {0x0033, 6}, {0x0034, 6}, + {0x0035, 6}, {0x0036, 6}, {0x00E0, 8}, {0x00E1, 8}, + {0x00E4, 8}, {0x00E5, 8}, {0x00E6, 8}, {0x00E7, 8}, + {0x00E9, 8}, {0x00EA, 8}, {0x00EC, 8}, {0x00ED, 8}, + {0x00EF, 8}, {0x01E0, 9}, {0x01E2, 9}, {0x01E3, 9}, + {0x01E5, 9}, {0x01E6, 9}, {0x01E8, 9}, {0x01E9, 9}, + {0x01EB, 9}, {0x01EC, 9}, {0x01EE, 9}, {0x01EF, 9}, + {0x03E0, 10}, {0x03E1, 10}, {0x03E2, 10}, {0x03E3, 10}, + {0x03E4, 10}, {0x03E5, 10}, {0x03E6, 10}, {0x03E7, 10}, + {0x03E8, 10}, {0x03E9, 10}, {0x03EA, 10}, {0x03EB, 10}, + {0x03EC, 10}, {0x03ED, 10}, {0x03EE, 10}, {0x03EF, 10}, + {0x1F80, 13}, {0x1F81, 13}, {0x1F82, 13}, {0x1F83, 13}, + {0x1F84, 13}, {0x1F85, 13}, {0x1F86, 13}, {0x1F87, 13}, + {0x1F88, 13}, {0x1F89, 13}, {0x1F8A, 13}, {0x1F8B, 13}, + {0x1F8C, 13}, {0x1F8D, 13}, {0x1F8E, 13}, {0x1F8F, 13}, + {0x1F90, 13}, {0x1F91, 13}, {0x1F92, 13}, {0x1F93, 13}, + {0x1F94, 13}, {0x1F95, 13}, {0x1F96, 13}, {0x1F97, 13}, + {0x1F98, 13}, {0x1F99, 13}, {0x1F9A, 13}, {0x1F9B, 13}, + {0x1F9C, 13}, {0x1F9D, 13}, {0x1F9E, 13}, {0x1F9F, 13}, + {0x1FA0, 13}, {0x1FA1, 13}, {0x1FA2, 13}, {0x1FA3, 13}, + {0x1FA4, 13}, {0x1FA5, 13}, {0x1FA6, 13}, {0x1FA7, 13}, + {0x1FA8, 13}, {0x1FA9, 13}, {0x1FAA, 13}, {0x1FAB, 13}, + {0x1FAC, 13}, {0x1FAD, 13}, {0x1FAE, 13}, {0x1FAF, 13}, + {0x1FB0, 13}, {0x1FB1, 13}, {0x1FB2, 13}, {0x1FB3, 13}, + {0x1FB4, 13}, {0x1FB5, 13}, {0x1FB6, 13}, {0x1FB7, 13}, + {0x1FB8, 13}, {0x1FB9, 13}, {0x1FBA, 13}, {0x1FBB, 13}, + {0x1FBC, 13}, {0x1FBD, 13}, {0x1FBE, 13}, {0x1FBF, 13}, + {0x3F80, 14}, {0x3F81, 14}, {0x3F82, 14}, {0x3F83, 14}, + {0x3F84, 14}, {0x3F85, 14}, {0x3F86, 14}, {0x3F87, 14}, + {0x3F88, 14}, {0x3F89, 14}, {0x3F8A, 14}, {0x0002, 3}, + {0x0011, 5}, {0x0014, 5}, {0x0015, 5}, {0x0030, 6}, + {0x0037, 6}, {0x00E2, 8}, {0x00E3, 8}, {0x00E8, 8}, + {0x00EB, 8}, {0x00EE, 8}, {0x01E1, 9}, {0x01E4, 9}, + {0x01E7, 9}, {0x01EA, 9}, {0x01ED, 9} +#endif +}; + +static const uint8_t ir2_luma_table[256] = { + 0x80, 0x80, 0x84, 0x84, 0x7C, 0x7C, 0x7F, 0x85, + 0x81, 0x7B, 0x85, 0x7F, 0x7B, 0x81, 0x8C, 0x8C, + 0x74, 0x74, 0x83, 0x8D, 0x7D, 0x73, 0x8D, 0x83, + 0x73, 0x7D, 0x77, 0x89, 0x89, 0x77, 0x89, 0x77, + 0x77, 0x89, 0x8C, 0x95, 0x74, 0x6B, 0x95, 0x8C, + 0x6B, 0x74, 0x7C, 0x90, 0x84, 0x70, 0x90, 0x7C, + 0x70, 0x84, 0x96, 0x96, 0x6A, 0x6A, 0x82, 0x98, + 0x7E, 0x68, 0x98, 0x82, 0x68, 0x7E, 0x97, 0xA2, + 0x69, 0x5E, 0xA2, 0x97, 0x5E, 0x69, 0xA2, 0xA2, + 0x5E, 0x5E, 0x8B, 0xA3, 0x75, 0x5D, 0xA3, 0x8B, + 0x5D, 0x75, 0x71, 0x95, 0x8F, 0x6B, 0x95, 0x71, + 0x6B, 0x8F, 0x78, 0x9D, 0x88, 0x63, 0x9D, 0x78, + 0x63, 0x88, 0x7F, 0xA7, 0x81, 0x59, 0xA7, 0x7F, + 0x59, 0x81, 0xA4, 0xB1, 0x5C, 0x4F, 0xB1, 0xA4, + 0x4F, 0x5C, 0x96, 0xB1, 0x6A, 0x4F, 0xB1, 0x96, + 0x4F, 0x6A, 0xB2, 0xB2, 0x4E, 0x4E, 0x65, 0x9B, + 0x9B, 0x65, 0x9B, 0x65, 0x65, 0x9B, 0x89, 0xB4, + 0x77, 0x4C, 0xB4, 0x89, 0x4C, 0x77, 0x6A, 0xA3, + 0x96, 0x5D, 0xA3, 0x6A, 0x5D, 0x96, 0x73, 0xAC, + 0x8D, 0x54, 0xAC, 0x73, 0x54, 0x8D, 0xB4, 0xC3, + 0x4C, 0x3D, 0xC3, 0xB4, 0x3D, 0x4C, 0xA4, 0xC3, + 0x5C, 0x3D, 0xC3, 0xA4, 0x3D, 0x5C, 0xC4, 0xC4, + 0x3C, 0x3C, 0x96, 0xC6, 0x6A, 0x3A, 0xC6, 0x96, + 0x3A, 0x6A, 0x7C, 0xBA, 0x84, 0x46, 0xBA, 0x7C, + 0x46, 0x84, 0x5B, 0xAB, 0xA5, 0x55, 0xAB, 0x5B, + 0x55, 0xA5, 0x63, 0xB4, 0x9D, 0x4C, 0xB4, 0x63, + 0x4C, 0x9D, 0x86, 0xCA, 0x7A, 0x36, 0xCA, 0x86, + 0x36, 0x7A, 0xB6, 0xD7, 0x4A, 0x29, 0xD7, 0xB6, + 0x29, 0x4A, 0xC8, 0xD7, 0x38, 0x29, 0xD7, 0xC8, + 0x29, 0x38, 0xA4, 0xD8, 0x5C, 0x28, 0xD8, 0xA4, + 0x28, 0x5C, 0x6C, 0xC1, 0x94, 0x3F, 0xC1, 0x6C, + 0x3F, 0x94, 0xD9, 0xD9, 0x27, 0x27, 0x80, 0x80 +}; diff --git a/mpeg4/src/libavcodec/indeo3.c b/mpeg4/src/libavcodec/indeo3.c new file mode 100644 index 0000000000000000000000000000000000000000..90eb37150a04d63145905ae16d34051539704ea9 --- /dev/null +++ b/mpeg4/src/libavcodec/indeo3.c @@ -0,0 +1,1151 @@ +/* + * Intel Indeo 3 (IV31, IV32, etc.) video decoder for ffmpeg + * written, produced, and directed by Alan Smithee + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +#include "indeo3data.h" + +typedef struct +{ + unsigned char *Ybuf; + unsigned char *Ubuf; + unsigned char *Vbuf; + unsigned char *the_buf; + unsigned int the_buf_size; + unsigned short y_w, y_h; + unsigned short uv_w, uv_h; +} YUVBufs; + +typedef struct Indeo3DecodeContext { + AVCodecContext *avctx; + int width, height; + AVFrame frame; + + YUVBufs iv_frame[2]; + YUVBufs *cur_frame; + YUVBufs *ref_frame; + + unsigned char *ModPred; + unsigned short *corrector_type; +} Indeo3DecodeContext; + +static int corrector_type_0[24] = { + 195, 159, 133, 115, 101, 93, 87, 77, + 195, 159, 133, 115, 101, 93, 87, 77, + 128, 79, 79, 79, 79, 79, 79, 79 +}; + +static int corrector_type_2[8] = { 9, 7, 6, 8, 5, 4, 3, 2 }; + +static void build_modpred(Indeo3DecodeContext *s) +{ + int i, j; + + s->ModPred = (unsigned char *) av_malloc (8 * 128); + + for (i=0; i < 128; ++i) { + s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2)); + s->ModPred[i+1*128] = (i == 7) ? 20 : ((i == 119 || i == 120) + ? 236 : 2*((i + 2) - ((i + 1) % 3))); + s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4)); + s->ModPred[i+3*128] = 2*((i + 1) - ((i - 3) % 5)); + s->ModPred[i+4*128] = (i == 8) ? 20 : 2*((i + 1) - ((i - 3) % 6)); + s->ModPred[i+5*128] = 2*((i + 4) - ((i + 3) % 7)); + s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8)); + s->ModPred[i+7*128] = 2*((i + 5) - ((i + 4) % 9)); + } + + s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short)); + + for (i=0; i < 24; ++i) { + for (j=0; j < 256; ++j) { + s->corrector_type[i*256+j] = (j < corrector_type_0[i]) + ? 1 : ((j < 248 || (i == 16 && j == 248)) + ? 0 : corrector_type_2[j - 248]); + } + } +} + +static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char *cur, + unsigned char *ref, int width, int height, unsigned char *buf1, + long fflags2, unsigned char *hdr, + unsigned char *buf2, int min_width_160); + +#ifndef min +#define min(a,b) ((a) < (b) ? (a) : (b)) +#endif + +/* ---------------------------------------------------------------------- */ +static void iv_alloc_frames(Indeo3DecodeContext *s) +{ + int luma_width, luma_height, luma_pixels, chroma_width, chroma_height, + chroma_pixels, i; + unsigned int bufsize; + + luma_width = (s->width + 3) & (~3); + luma_height = (s->height + 3) & (~3); + + s->iv_frame[0].y_w = s->iv_frame[0].y_h = + s->iv_frame[0].the_buf_size = 0; + s->iv_frame[1].y_w = s->iv_frame[1].y_h = + s->iv_frame[1].the_buf_size = 0; + s->iv_frame[1].the_buf = NULL; + + chroma_width = ((luma_width >> 2) + 3) & (~3); + chroma_height = ((luma_height>> 2) + 3) & (~3); + luma_pixels = luma_width * luma_height; + chroma_pixels = chroma_width * chroma_height; + + bufsize = luma_pixels * 2 + luma_width * 3 + + (chroma_pixels + chroma_width) * 4; + + if((s->iv_frame[0].the_buf = + (s->iv_frame[0].the_buf_size == 0 ? av_malloc(bufsize) : + av_realloc(s->iv_frame[0].the_buf, bufsize))) == NULL) + return; + s->iv_frame[0].y_w = s->iv_frame[1].y_w = luma_width; + s->iv_frame[0].y_h = s->iv_frame[1].y_h = luma_height; + s->iv_frame[0].uv_w = s->iv_frame[1].uv_w = chroma_width; + s->iv_frame[0].uv_h = s->iv_frame[1].uv_h = chroma_height; + s->iv_frame[0].the_buf_size = bufsize; + + s->iv_frame[0].Ybuf = s->iv_frame[0].the_buf + luma_width; + i = luma_pixels + luma_width * 2; + s->iv_frame[1].Ybuf = s->iv_frame[0].the_buf + i; + i += (luma_pixels + luma_width); + s->iv_frame[0].Ubuf = s->iv_frame[0].the_buf + i; + i += (chroma_pixels + chroma_width); + s->iv_frame[1].Ubuf = s->iv_frame[0].the_buf + i; + i += (chroma_pixels + chroma_width); + s->iv_frame[0].Vbuf = s->iv_frame[0].the_buf + i; + i += (chroma_pixels + chroma_width); + s->iv_frame[1].Vbuf = s->iv_frame[0].the_buf + i; + + for(i = 1; i <= luma_width; i++) + s->iv_frame[0].Ybuf[-i] = s->iv_frame[1].Ybuf[-i] = + s->iv_frame[0].Ubuf[-i] = 0x80; + + for(i = 1; i <= chroma_width; i++) { + s->iv_frame[1].Ubuf[-i] = 0x80; + s->iv_frame[0].Vbuf[-i] = 0x80; + s->iv_frame[1].Vbuf[-i] = 0x80; + s->iv_frame[1].Vbuf[chroma_pixels+i-1] = 0x80; + } +} + +/* ---------------------------------------------------------------------- */ +static void iv_free_func(Indeo3DecodeContext *s) +{ + int i; + + for(i = 0 ; i < 2 ; i++) { + if(s->iv_frame[i].the_buf != NULL) + av_free(s->iv_frame[i].the_buf); + s->iv_frame[i].Ybuf = s->iv_frame[i].Ubuf = + s->iv_frame[i].Vbuf = NULL; + s->iv_frame[i].the_buf = NULL; + s->iv_frame[i].the_buf_size = 0; + s->iv_frame[i].y_w = s->iv_frame[i].y_h = 0; + s->iv_frame[i].uv_w = s->iv_frame[i].uv_h = 0; + } + + av_free(s->ModPred); + av_free(s->corrector_type); +} + +/* ---------------------------------------------------------------------- */ +static unsigned long iv_decode_frame(Indeo3DecodeContext *s, + unsigned char *buf, int buf_size) +{ + unsigned int hdr_width, hdr_height, + chroma_width, chroma_height; + unsigned long fflags1, fflags2, fflags3, offs1, offs2, offs3, offs; + unsigned char *hdr_pos, *buf_pos; + + buf_pos = buf; + buf_pos += 18; + + fflags1 = le2me_16(*(uint16_t *)buf_pos); + buf_pos += 2; + fflags3 = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 4; + fflags2 = *buf_pos++; + buf_pos += 3; + hdr_height = le2me_16(*(uint16_t *)buf_pos); + buf_pos += 2; + hdr_width = le2me_16(*(uint16_t *)buf_pos); + + if(avcodec_check_dimensions(NULL, hdr_width, hdr_height)) + return -1; + + buf_pos += 2; + chroma_height = ((hdr_height >> 2) + 3) & 0x7ffc; + chroma_width = ((hdr_width >> 2) + 3) & 0x7ffc; + offs1 = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 4; + offs2 = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 4; + offs3 = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 8; + hdr_pos = buf_pos; + if(fflags3 == 0x80) return 4; + + if(fflags1 & 0x200) { + s->cur_frame = s->iv_frame + 1; + s->ref_frame = s->iv_frame; + } else { + s->cur_frame = s->iv_frame; + s->ref_frame = s->iv_frame + 1; + } + + buf_pos = buf + 16 + offs1; + offs = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 4; + + iv_Decode_Chunk(s, s->cur_frame->Ybuf, s->ref_frame->Ybuf, hdr_width, + hdr_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, + min(hdr_width, 160)); + + if (!(s->avctx->flags & CODEC_FLAG_GRAY)) + { + + buf_pos = buf + 16 + offs2; + offs = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 4; + + iv_Decode_Chunk(s, s->cur_frame->Vbuf, s->ref_frame->Vbuf, chroma_width, + chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, + min(chroma_width, 40)); + + buf_pos = buf + 16 + offs3; + offs = le2me_32(*(uint32_t *)buf_pos); + buf_pos += 4; + + iv_Decode_Chunk(s, s->cur_frame->Ubuf, s->ref_frame->Ubuf, chroma_width, + chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, + min(chroma_width, 40)); + + } + + return 8; +} + +typedef struct { + long xpos; + long ypos; + long width; + long height; + long split_flag; + long split_direction; + long usl7; +} ustr_t; + +/* ---------------------------------------------------------------------- */ + +#define LV1_CHECK(buf1,rle_v3,lv1,lp2) \ + if((lv1 & 0x80) != 0) { \ + if(rle_v3 != 0) \ + rle_v3 = 0; \ + else { \ + rle_v3 = 1; \ + buf1 -= 2; \ + } \ + } \ + lp2 = 4; + + +#define RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3) \ + if(rle_v3 == 0) { \ + rle_v2 = *buf1; \ + rle_v1 = 1; \ + if(rle_v2 > 32) { \ + rle_v2 -= 32; \ + rle_v1 = 0; \ + } \ + rle_v3 = 1; \ + } \ + buf1--; + + +#define LP2_CHECK(buf1,rle_v3,lp2) \ + if(lp2 == 0 && rle_v3 != 0) \ + rle_v3 = 0; \ + else { \ + buf1--; \ + rle_v3 = 1; \ + } + + +#define RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) \ + rle_v2--; \ + if(rle_v2 == 0) { \ + rle_v3 = 0; \ + buf1 += 2; \ + } \ + lp2 = 4; + +static void iv_Decode_Chunk(Indeo3DecodeContext *s, + unsigned char *cur, unsigned char *ref, int width, int height, + unsigned char *buf1, long fflags2, unsigned char *hdr, + unsigned char *buf2, int min_width_160) +{ + unsigned char bit_buf; + unsigned long bit_pos, lv, lv1, lv2; + long *width_tbl, width_tbl_arr[10]; + signed char *ref_vectors; + unsigned char *cur_frm_pos, *ref_frm_pos, *cp, *cp2; + uint32_t *cur_lp, *ref_lp; + const uint32_t *correction_lp[2], *correctionloworder_lp[2], *correctionhighorder_lp[2]; + unsigned short *correction_type_sp[2]; + ustr_t strip_tbl[20], *strip; + int i, j, k, lp1, lp2, flag1, cmd, blks_width, blks_height, region_160_width, + rle_v1, rle_v2, rle_v3; + unsigned short res; + + bit_buf = 0; + ref_vectors = NULL; + + width_tbl = width_tbl_arr + 1; + i = (width < 0 ? width + 3 : width)/4; + for(j = -1; j < 8; j++) + width_tbl[j] = i * j; + + strip = strip_tbl; + + for(region_160_width = 0; region_160_width < (width - min_width_160); region_160_width += min_width_160); + + strip->ypos = strip->xpos = 0; + for(strip->width = min_width_160; width > strip->width; strip->width *= 2); + strip->height = height; + strip->split_direction = 0; + strip->split_flag = 0; + strip->usl7 = 0; + + bit_pos = 0; + + rle_v1 = rle_v2 = rle_v3 = 0; + + while(strip >= strip_tbl) { + if(bit_pos <= 0) { + bit_pos = 8; + bit_buf = *buf1++; + } + + bit_pos -= 2; + cmd = (bit_buf >> bit_pos) & 0x03; + + if(cmd == 0) { + strip++; + memcpy(strip, strip-1, sizeof(ustr_t)); + strip->split_flag = 1; + strip->split_direction = 0; + strip->height = (strip->height > 8 ? ((strip->height+8)>>4)<<3 : 4); + continue; + } else if(cmd == 1) { + strip++; + memcpy(strip, strip-1, sizeof(ustr_t)); + strip->split_flag = 1; + strip->split_direction = 1; + strip->width = (strip->width > 8 ? ((strip->width+8)>>4)<<3 : 4); + continue; + } else if(cmd == 2) { + if(strip->usl7 == 0) { + strip->usl7 = 1; + ref_vectors = NULL; + continue; + } + } else if(cmd == 3) { + if(strip->usl7 == 0) { + strip->usl7 = 1; + ref_vectors = buf2 + (*buf1 * 2); + buf1++; + continue; + } + } + + cur_frm_pos = cur + width * strip->ypos + strip->xpos; + + if((blks_width = strip->width) < 0) + blks_width += 3; + blks_width >>= 2; + blks_height = strip->height; + + if(ref_vectors != NULL) { + ref_frm_pos = ref + (ref_vectors[0] + strip->ypos) * width + + ref_vectors[1] + strip->xpos; + } else + ref_frm_pos = cur_frm_pos - width_tbl[4]; + + if(cmd == 2) { + if(bit_pos <= 0) { + bit_pos = 8; + bit_buf = *buf1++; + } + + bit_pos -= 2; + cmd = (bit_buf >> bit_pos) & 0x03; + + if(cmd == 0 || ref_vectors != NULL) { + for(lp1 = 0; lp1 < blks_width; lp1++) { + for(i = 0, j = 0; i < blks_height; i++, j += width_tbl[1]) + ((uint32_t *)cur_frm_pos)[j] = ((uint32_t *)ref_frm_pos)[j]; + cur_frm_pos += 4; + ref_frm_pos += 4; + } + } else if(cmd != 1) + return; + } else { + k = *buf1 >> 4; + j = *buf1 & 0x0f; + buf1++; + lv = j + fflags2; + + if((lv - 8) <= 7 && (k == 0 || k == 3 || k == 10)) { + cp2 = s->ModPred + ((lv - 8) << 7); + cp = ref_frm_pos; + for(i = 0; i < blks_width << 2; i++) { + int v = *cp >> 1; + *(cp++) = cp2[v]; + } + } + + if(k == 1 || k == 4) { + lv = (hdr[j] & 0xf) + fflags2; + correction_type_sp[0] = s->corrector_type + (lv << 8); + correction_lp[0] = correction + (lv << 8); + lv = (hdr[j] >> 4) + fflags2; + correction_lp[1] = correction + (lv << 8); + correction_type_sp[1] = s->corrector_type + (lv << 8); + } else { + correctionloworder_lp[0] = correctionloworder_lp[1] = correctionloworder + (lv << 8); + correctionhighorder_lp[0] = correctionhighorder_lp[1] = correctionhighorder + (lv << 8); + correction_type_sp[0] = correction_type_sp[1] = s->corrector_type + (lv << 8); + correction_lp[0] = correction_lp[1] = correction + (lv << 8); + } + + switch(k) { + case 1: + case 0: /********** CASE 0 **********/ + for( ; blks_height > 0; blks_height -= 4) { + for(lp1 = 0; lp1 < blks_width; lp1++) { + for(lp2 = 0; lp2 < 4; ) { + k = *buf1++; + cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2]; + ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2]; + + switch(correction_type_sp[0][k]) { + case 0: + *cur_lp = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); + lp2++; + break; + case 1: + res = ((le2me_16(((unsigned short *)(ref_lp))[0]) >> 1) + correction_lp[lp2 & 0x01][*buf1]) << 1; + ((unsigned short *)cur_lp)[0] = le2me_16(res); + res = ((le2me_16(((unsigned short *)(ref_lp))[1]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + ((unsigned short *)cur_lp)[1] = le2me_16(res); + buf1++; + lp2++; + break; + case 2: + if(lp2 == 0) { + for(i = 0, j = 0; i < 2; i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + lp2 += 2; + } + break; + case 3: + if(lp2 < 2) { + for(i = 0, j = 0; i < (3 - lp2); i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + lp2 = 3; + } + break; + case 8: + if(lp2 == 0) { + RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3) + + if(rle_v1 == 1 || ref_vectors != NULL) { + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + } + + RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) + break; + } else { + rle_v1 = 1; + rle_v2 = *buf1 - 1; + } + case 5: + LP2_CHECK(buf1,rle_v3,lp2) + case 4: + for(i = 0, j = 0; i < (4 - lp2); i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + lp2 = 4; + break; + + case 7: + if(rle_v3 != 0) + rle_v3 = 0; + else { + buf1--; + rle_v3 = 1; + } + case 6: + if(ref_vectors != NULL) { + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + } + lp2 = 4; + break; + + case 9: + lv1 = *buf1++; + lv = (lv1 & 0x7F) << 1; + lv += (lv << 8); + lv += (lv << 16); + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = lv; + + LV1_CHECK(buf1,rle_v3,lv1,lp2) + break; + default: + return; + } + } + + cur_frm_pos += 4; + ref_frm_pos += 4; + } + + cur_frm_pos += ((width - blks_width) * 4); + ref_frm_pos += ((width - blks_width) * 4); + } + break; + + case 4: + case 3: /********** CASE 3 **********/ + if(ref_vectors != NULL) + return; + flag1 = 1; + + for( ; blks_height > 0; blks_height -= 8) { + for(lp1 = 0; lp1 < blks_width; lp1++) { + for(lp2 = 0; lp2 < 4; ) { + k = *buf1++; + + cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2]; + ref_lp = ((uint32_t *)cur_frm_pos) + width_tbl[(lp2 * 2) - 1]; + + switch(correction_type_sp[lp2 & 0x01][k]) { + case 0: + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); + if(lp2 > 0 || flag1 == 0 || strip->ypos != 0) + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + else + cur_lp[0] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); + lp2++; + break; + + case 1: + res = ((le2me_16(((unsigned short *)ref_lp)[0]) >> 1) + correction_lp[lp2 & 0x01][*buf1]) << 1; + ((unsigned short *)cur_lp)[width_tbl[2]] = le2me_16(res); + res = ((le2me_16(((unsigned short *)ref_lp)[1]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + ((unsigned short *)cur_lp)[width_tbl[2]+1] = le2me_16(res); + + if(lp2 > 0 || flag1 == 0 || strip->ypos != 0) + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + else + cur_lp[0] = cur_lp[width_tbl[1]]; + buf1++; + lp2++; + break; + + case 2: + if(lp2 == 0) { + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = *ref_lp; + lp2 += 2; + } + break; + + case 3: + if(lp2 < 2) { + for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1]) + cur_lp[j] = *ref_lp; + lp2 = 3; + } + break; + + case 6: + lp2 = 4; + break; + + case 7: + if(rle_v3 != 0) + rle_v3 = 0; + else { + buf1--; + rle_v3 = 1; + } + lp2 = 4; + break; + + case 8: + if(lp2 == 0) { + RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3) + + if(rle_v1 == 1) { + for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + } + + RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) + break; + } else { + rle_v2 = (*buf1) - 1; + rle_v1 = 1; + } + case 5: + LP2_CHECK(buf1,rle_v3,lp2) + case 4: + for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1]) + cur_lp[j] = *ref_lp; + lp2 = 4; + break; + + case 9: + av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n"); + lv1 = *buf1++; + lv = (lv1 & 0x7F) << 1; + lv += (lv << 8); + lv += (lv << 16); + + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = lv; + + LV1_CHECK(buf1,rle_v3,lv1,lp2) + break; + + default: + return; + } + } + + cur_frm_pos += 4; + } + + cur_frm_pos += (((width * 2) - blks_width) * 4); + flag1 = 0; + } + break; + + case 10: /********** CASE 10 **********/ + if(ref_vectors == NULL) { + flag1 = 1; + + for( ; blks_height > 0; blks_height -= 8) { + for(lp1 = 0; lp1 < blks_width; lp1 += 2) { + for(lp2 = 0; lp2 < 4; ) { + k = *buf1++; + cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2]; + ref_lp = ((uint32_t *)cur_frm_pos) + width_tbl[(lp2 * 2) - 1]; + lv1 = ref_lp[0]; + lv2 = ref_lp[1]; + if(lp2 == 0 && flag1 != 0) { +#ifdef WORDS_BIGENDIAN + lv1 = lv1 & 0xFF00FF00; + lv1 = (lv1 >> 8) | lv1; + lv2 = lv2 & 0xFF00FF00; + lv2 = (lv2 >> 8) | lv2; +#else + lv1 = lv1 & 0x00FF00FF; + lv1 = (lv1 << 8) | lv1; + lv2 = lv2 & 0x00FF00FF; + lv2 = (lv2 << 8) | lv2; +#endif + } + + switch(correction_type_sp[lp2 & 0x01][k]) { + case 0: + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(lv1) >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(lv2) >> 1) + correctionhighorder_lp[lp2 & 0x01][k]) << 1); + if(lp2 > 0 || strip->ypos != 0 || flag1 == 0) { + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; + } else { + cur_lp[0] = cur_lp[width_tbl[1]]; + cur_lp[1] = cur_lp[width_tbl[1]+1]; + } + lp2++; + break; + + case 1: + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(lv1) >> 1) + correctionloworder_lp[lp2 & 0x01][*buf1]) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(lv2) >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1); + if(lp2 > 0 || strip->ypos != 0 || flag1 == 0) { + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; + } else { + cur_lp[0] = cur_lp[width_tbl[1]]; + cur_lp[1] = cur_lp[width_tbl[1]+1]; + } + buf1++; + lp2++; + break; + + case 2: + if(lp2 == 0) { + if(flag1 != 0) { + for(i = 0, j = width_tbl[1]; i < 3; i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; + } else { + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + } + lp2 += 2; + } + break; + + case 3: + if(lp2 < 2) { + if(lp2 == 0 && flag1 != 0) { + for(i = 0, j = width_tbl[1]; i < 5; i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; + } else { + for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + } + lp2 = 3; + } + break; + + case 8: + if(lp2 == 0) { + RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3) + if(rle_v1 == 1) { + if(flag1 != 0) { + for(i = 0, j = width_tbl[1]; i < 7; i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; + } else { + for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + } + } + RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) + break; + } else { + rle_v1 = 1; + rle_v2 = (*buf1) - 1; + } + case 5: + LP2_CHECK(buf1,rle_v3,lp2) + case 4: + if(lp2 == 0 && flag1 != 0) { + for(i = 0, j = width_tbl[1]; i < 7; i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; + cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; + } else { + for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1]) { + cur_lp[j] = lv1; + cur_lp[j+1] = lv2; + } + } + lp2 = 4; + break; + + case 6: + lp2 = 4; + break; + + case 7: + if(lp2 == 0) { + if(rle_v3 != 0) + rle_v3 = 0; + else { + buf1--; + rle_v3 = 1; + } + lp2 = 4; + } + break; + + case 9: + av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n"); + lv1 = *buf1; + lv = (lv1 & 0x7F) << 1; + lv += (lv << 8); + lv += (lv << 16); + for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) + cur_lp[j] = lv; + LV1_CHECK(buf1,rle_v3,lv1,lp2) + break; + + default: + return; + } + } + + cur_frm_pos += 8; + } + + cur_frm_pos += (((width * 2) - blks_width) * 4); + flag1 = 0; + } + } else { + for( ; blks_height > 0; blks_height -= 8) { + for(lp1 = 0; lp1 < blks_width; lp1 += 2) { + for(lp2 = 0; lp2 < 4; ) { + k = *buf1++; + cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2]; + ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2 * 2]; + + switch(correction_type_sp[lp2 & 0x01][k]) { + case 0: + lv1 = correctionloworder_lp[lp2 & 0x01][k]; + lv2 = correctionhighorder_lp[lp2 & 0x01][k]; + cur_lp[0] = le2me_32(((le2me_32(ref_lp[0]) >> 1) + lv1) << 1); + cur_lp[1] = le2me_32(((le2me_32(ref_lp[1]) >> 1) + lv2) << 1); + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + lv1) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(ref_lp[width_tbl[1]+1]) >> 1) + lv2) << 1); + lp2++; + break; + + case 1: + lv1 = correctionloworder_lp[lp2 & 0x01][*buf1++]; + lv2 = correctionloworder_lp[lp2 & 0x01][k]; + cur_lp[0] = le2me_32(((le2me_32(ref_lp[0]) >> 1) + lv1) << 1); + cur_lp[1] = le2me_32(((le2me_32(ref_lp[1]) >> 1) + lv2) << 1); + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + lv1) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(ref_lp[width_tbl[1]+1]) >> 1) + lv2) << 1); + lp2++; + break; + + case 2: + if(lp2 == 0) { + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) { + cur_lp[j] = ref_lp[j]; + cur_lp[j+1] = ref_lp[j+1]; + } + lp2 += 2; + } + break; + + case 3: + if(lp2 < 2) { + for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1]) { + cur_lp[j] = ref_lp[j]; + cur_lp[j+1] = ref_lp[j+1]; + } + lp2 = 3; + } + break; + + case 8: + if(lp2 == 0) { + RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3) + for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) { + ((uint32_t *)cur_frm_pos)[j] = ((uint32_t *)ref_frm_pos)[j]; + ((uint32_t *)cur_frm_pos)[j+1] = ((uint32_t *)ref_frm_pos)[j+1]; + } + RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) + break; + } else { + rle_v1 = 1; + rle_v2 = (*buf1) - 1; + } + case 5: + case 7: + LP2_CHECK(buf1,rle_v3,lp2) + case 6: + case 4: + for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1]) { + cur_lp[j] = ref_lp[j]; + cur_lp[j+1] = ref_lp[j+1]; + } + lp2 = 4; + break; + + case 9: + av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n"); + lv1 = *buf1; + lv = (lv1 & 0x7F) << 1; + lv += (lv << 8); + lv += (lv << 16); + for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) + ((uint32_t *)cur_frm_pos)[j] = ((uint32_t *)cur_frm_pos)[j+1] = lv; + LV1_CHECK(buf1,rle_v3,lv1,lp2) + break; + + default: + return; + } + } + + cur_frm_pos += 8; + ref_frm_pos += 8; + } + + cur_frm_pos += (((width * 2) - blks_width) * 4); + ref_frm_pos += (((width * 2) - blks_width) * 4); + } + } + break; + + case 11: /********** CASE 11 **********/ + if(ref_vectors == NULL) + return; + + for( ; blks_height > 0; blks_height -= 8) { + for(lp1 = 0; lp1 < blks_width; lp1++) { + for(lp2 = 0; lp2 < 4; ) { + k = *buf1++; + cur_lp = ((uint32_t *)cur_frm_pos) + width_tbl[lp2 * 2]; + ref_lp = ((uint32_t *)ref_frm_pos) + width_tbl[lp2 * 2]; + + switch(correction_type_sp[lp2 & 0x01][k]) { + case 0: + cur_lp[0] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); + lp2++; + break; + + case 1: + lv1 = (unsigned short)(correction_lp[lp2 & 0x01][*buf1++]); + lv2 = (unsigned short)(correction_lp[lp2 & 0x01][k]); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[0]) >> 1) + lv1) << 1); + ((unsigned short *)cur_lp)[0] = le2me_16(res); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[1]) >> 1) + lv2) << 1); + ((unsigned short *)cur_lp)[1] = le2me_16(res); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[width_tbl[2]]) >> 1) + lv1) << 1); + ((unsigned short *)cur_lp)[width_tbl[2]] = le2me_16(res); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[width_tbl[2]+1]) >> 1) + lv2) << 1); + ((unsigned short *)cur_lp)[width_tbl[2]+1] = le2me_16(res); + lp2++; + break; + + case 2: + if(lp2 == 0) { + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + lp2 += 2; + } + break; + + case 3: + if(lp2 < 2) { + for(i = 0, j = 0; i < 6 - (lp2 * 2); i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + lp2 = 3; + } + break; + + case 8: + if(lp2 == 0) { + RLE_V3_CHECK(buf1,rle_v1,rle_v2,rle_v3) + + for(i = 0, j = 0; i < 8; i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + + RLE_V2_CHECK(buf1,rle_v2, rle_v3,lp2) + break; + } else { + rle_v1 = 1; + rle_v2 = (*buf1) - 1; + } + case 5: + case 7: + LP2_CHECK(buf1,rle_v3,lp2) + case 4: + case 6: + for(i = 0, j = 0; i < 8 - (lp2 * 2); i++, j += width_tbl[1]) + cur_lp[j] = ref_lp[j]; + lp2 = 4; + break; + + case 9: + av_log(s->avctx, AV_LOG_ERROR, "UNTESTED.\n"); + lv1 = *buf1++; + lv = (lv1 & 0x7F) << 1; + lv += (lv << 8); + lv += (lv << 16); + for(i = 0, j = 0; i < 4; i++, j += width_tbl[1]) + cur_lp[j] = lv; + LV1_CHECK(buf1,rle_v3,lv1,lp2) + break; + + default: + return; + } + } + + cur_frm_pos += 4; + ref_frm_pos += 4; + } + + cur_frm_pos += (((width * 2) - blks_width) * 4); + ref_frm_pos += (((width * 2) - blks_width) * 4); + } + break; + + default: + return; + } + } + + if(strip < strip_tbl) + return; + + for( ; strip >= strip_tbl; strip--) { + if(strip->split_flag != 0) { + strip->split_flag = 0; + strip->usl7 = (strip-1)->usl7; + + if(strip->split_direction) { + strip->xpos += strip->width; + strip->width = (strip-1)->width - strip->width; + if(region_160_width <= strip->xpos && width < strip->width + strip->xpos) + strip->width = width - strip->xpos; + } else { + strip->ypos += strip->height; + strip->height = (strip-1)->height - strip->height; + } + break; + } + } + } +} + +static int indeo3_decode_init(AVCodecContext *avctx) +{ + Indeo3DecodeContext *s = avctx->priv_data; + + s->avctx = avctx; + s->width = avctx->width; + s->height = avctx->height; + avctx->pix_fmt = PIX_FMT_YUV410P; + avctx->has_b_frames = 0; + + build_modpred(s); + iv_alloc_frames(s); + + return 0; +} + +static int indeo3_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + unsigned char *buf, int buf_size) +{ + Indeo3DecodeContext *s=avctx->priv_data; + unsigned char *src, *dest; + int y; + + iv_decode_frame(s, buf, buf_size); + + if(s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + s->frame.reference = 0; + if(avctx->get_buffer(avctx, &s->frame) < 0) { + av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + src = s->cur_frame->Ybuf; + dest = s->frame.data[0]; + for (y = 0; y < s->height; y++) { + memcpy(dest, src, s->cur_frame->y_w); + src += s->cur_frame->y_w; + dest += s->frame.linesize[0]; + } + + if (!(s->avctx->flags & CODEC_FLAG_GRAY)) + { + src = s->cur_frame->Ubuf; + dest = s->frame.data[1]; + for (y = 0; y < s->height / 4; y++) { + memcpy(dest, src, s->cur_frame->uv_w); + src += s->cur_frame->uv_w; + dest += s->frame.linesize[1]; + } + + src = s->cur_frame->Vbuf; + dest = s->frame.data[2]; + for (y = 0; y < s->height / 4; y++) { + memcpy(dest, src, s->cur_frame->uv_w); + src += s->cur_frame->uv_w; + dest += s->frame.linesize[2]; + } + } + + *data_size=sizeof(AVFrame); + *(AVFrame*)data= s->frame; + + return buf_size; +} + +static int indeo3_decode_end(AVCodecContext *avctx) +{ + Indeo3DecodeContext *s = avctx->priv_data; + + iv_free_func(s); + + return 0; +} + +AVCodec indeo3_decoder = { + "indeo3", + CODEC_TYPE_VIDEO, + CODEC_ID_INDEO3, + sizeof(Indeo3DecodeContext), + indeo3_decode_init, + NULL, + indeo3_decode_end, + indeo3_decode_frame, + 0, + NULL +}; diff --git a/mpeg4/src/libavcodec/indeo3data.h b/mpeg4/src/libavcodec/indeo3data.h new file mode 100644 index 0000000000000000000000000000000000000000..77bbc07ba5162014892cfa15fe24b4ea7d30f17e --- /dev/null +++ b/mpeg4/src/libavcodec/indeo3data.h @@ -0,0 +1,2315 @@ + +static const uint32_t correction[] = { + 0x00000000, 0x00000202, 0xfffffdfe, 0x000002ff, 0xfffffd01, 0xffffff03, 0x000000fd, 0x00000404, + 0xfffffbfc, 0x00000501, 0xfffffaff, 0x00000105, 0xfffffefb, 0x000003fc, 0xfffffc04, 0x000005fe, + 0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000904, 0xfffff6fc, 0x00000409, 0xfffffbf7, 0x00000909, + 0xfffff6f7, 0x00000a01, 0xfffff5ff, 0x0000010a, 0xfffffef6, 0x000007fb, 0xfffff805, 0xfffffb08, + 0x000004f8, 0x00000f09, 0xfffff0f7, 0x0000090f, 0xfffff6f1, 0x00000bfd, 0xfffff403, 0xfffffd0c, + 0x000002f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200, + 0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff7, 0xfffff009, 0xfffff710, + 0x000008f0, 0x00001b0b, 0xffffe4f5, 0x00000b1b, 0xfffff4e5, 0x00001c13, 0xffffe3ed, 0x0000131c, + 0xffffece4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001d04, 0xffffe2fc, 0x0000041d, + 0xfffffbe3, 0x00001e1e, 0xffffe1e2, 0x000020fe, 0xffffdf02, 0xfffffe21, 0x000001df, 0x000016ee, + 0xffffe912, 0xffffee17, 0x000011e9, 0x00001df1, 0xffffe20f, 0xfffff11e, 0x00000ee2, 0x00002e16, + 0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003123, + 0xffffcedd, 0x00002331, 0xffffdccf, 0x000028f5, 0xffffd70b, 0xfffff529, 0x00000ad7, 0x00003304, + 0xffffccfc, 0x00000433, 0xfffffbcd, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e3, + 0xffffd61d, 0xffffe32a, 0x00001cd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1b, + 0xffffb3e5, 0x00001b4c, 0xffffe4b4, 0x00004d2b, 0xffffb2d5, 0x00002b4d, 0xffffd4b3, 0x000036e8, + 0xffffc918, 0xffffe837, 0x000017c9, 0x00004f0e, 0xffffb0f2, 0x00000e4f, 0xfffff1b1, 0x0000533f, + 0xffffacc1, 0x00003f53, 0xffffc0ad, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802, + 0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005d5d, 0xffffa2a3, 0x00003ccc, 0xffffc334, 0xffffcc3d, + 0x000033c3, 0x00007834, 0xffff87cc, 0x00003478, 0xffffcb88, 0x00004ad3, 0xffffb52d, 0xffffd34b, + 0x00002cb5, 0x00007d4b, 0xffff82b5, 0x00004b7d, 0xffffb483, 0x00007a21, 0xffff85df, 0x0000217a, + 0xffffde86, 0x000066f3, 0xffff990d, 0xfffff367, 0x00000c99, 0x00005fd8, 0xffffa028, 0xffffd860, + 0x000027a0, 0x00007ede, 0xffff8122, 0xffffde7f, 0x00002181, 0x000058a7, 0xffffa759, 0x000068b2, + 0xffff974e, 0xffffb269, 0x00004d97, 0x00000c0c, 0xfffff3f4, 0x00001717, 0xffffe8e9, 0x00002a2a, + 0xffffd5d6, 0x00004949, 0xffffb6b7, 0x00000000, 0x02020000, 0xfdfe0000, 0x02ff0000, 0xfd010000, + 0xff030000, 0x00fd0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02ff0202, 0xfd010202, 0xff030202, + 0x00fd0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x02fefdfe, 0xfd00fdfe, 0xff02fdfe, 0x00fcfdfe, + 0x000002ff, 0x020202ff, 0xfdfe02ff, 0x02ff02ff, 0xfd0102ff, 0xff0302ff, 0x00fd02ff, 0xfffffd01, + 0x0201fd01, 0xfdfdfd01, 0x02fefd01, 0xfd00fd01, 0xff02fd01, 0x00fcfd01, 0xffffff03, 0x0201ff03, + 0xfdfdff03, 0x02feff03, 0xfd00ff03, 0xff02ff03, 0x00fcff03, 0x000000fd, 0x020200fd, 0xfdfe00fd, + 0x02ff00fd, 0xfd0100fd, 0xff0300fd, 0x00fd00fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000303, 0xfffffcfd, 0x000003ff, 0xfffffc01, 0xffffff04, 0x000000fc, 0x00000707, + 0xfffff8f9, 0x00000802, 0xfffff7fe, 0x00000208, 0xfffffdf8, 0x000008fe, 0xfffff702, 0xfffffe09, + 0x000001f7, 0x000005fa, 0xfffffa06, 0x00000d06, 0xfffff2fa, 0x0000060d, 0xfffff9f3, 0x00000d0d, + 0xfffff2f3, 0x00000e01, 0xfffff1ff, 0x0000010e, 0xfffffef2, 0x00000bf8, 0xfffff408, 0xfffff80c, + 0x000007f4, 0x0000170e, 0xffffe8f2, 0x00000e17, 0xfffff1e9, 0x000011fb, 0xffffee05, 0xfffffb12, + 0x000004ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001aff, + 0xffffe501, 0xffffff1b, 0x000000e5, 0x000010ef, 0xffffef11, 0x000016f3, 0xffffe90d, 0xfffff317, + 0x00000ce9, 0x00002810, 0xffffd7f0, 0x00001028, 0xffffefd8, 0x0000291c, 0xffffd6e4, 0x00001c29, + 0xffffe3d7, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002b06, 0xffffd4fa, 0x0000062b, + 0xfffff9d5, 0x00002e2e, 0xffffd1d2, 0x000031fc, 0xffffce04, 0xfffffc32, 0x000003ce, 0x000021e5, + 0xffffde1b, 0xffffe522, 0x00001ade, 0x00002cea, 0xffffd316, 0xffffea2d, 0x000015d3, 0x00004522, + 0xffffbade, 0x00002245, 0xffffddbb, 0x00004613, 0xffffb9ed, 0x00001346, 0xffffecba, 0x00004935, + 0xffffb6cb, 0x00003549, 0xffffcab7, 0x00003def, 0xffffc211, 0xffffef3e, 0x000010c2, 0x00004d05, + 0xffffb2fb, 0x0000054d, 0xfffffab3, 0x00005252, 0xffffadae, 0x000032cd, 0xffffcd33, 0x00003fd5, + 0xffffc02b, 0xffffd540, 0x00002ac0, 0x000059f6, 0xffffa60a, 0xfffff65a, 0x000009a6, 0x00007229, + 0xffff8dd7, 0x00002972, 0xffffd68e, 0x00007440, 0xffff8bc0, 0x00004074, 0xffffbf8c, 0x000051db, + 0xffffae25, 0xffffdb52, 0x000024ae, 0x00007716, 0xffff88ea, 0x00001677, 0xffffe989, 0x00007c5f, + 0xffff83a1, 0x00005f7c, 0xffffa084, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005bb2, + 0xffffa44e, 0xffffb25c, 0x00004da4, 0x000070bc, 0xffff8f44, 0xffffbc71, 0x0000438f, 0x00001212, + 0xffffedee, 0x00002222, 0xffffddde, 0x00003f3f, 0xffffc0c1, 0x00006d6d, 0xffff9293, 0x00000000, + 0x03030000, 0xfcfd0000, 0x03ff0000, 0xfc010000, 0xff040000, 0x00fc0000, 0x07070000, 0xf8f90000, + 0x00000303, 0x03030303, 0xfcfd0303, 0x03ff0303, 0xfc010303, 0xff040303, 0x00fc0303, 0x07070303, + 0xf8f90303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x03fefcfd, 0xfc00fcfd, 0xff03fcfd, 0x00fbfcfd, + 0x0706fcfd, 0xf8f8fcfd, 0x000003ff, 0x030303ff, 0xfcfd03ff, 0x03ff03ff, 0xfc0103ff, 0xff0403ff, + 0x00fc03ff, 0x070703ff, 0xf8f903ff, 0xfffffc01, 0x0302fc01, 0xfcfcfc01, 0x03fefc01, 0xfc00fc01, + 0xff03fc01, 0x00fbfc01, 0x0706fc01, 0xf8f8fc01, 0xffffff04, 0x0302ff04, 0xfcfcff04, 0x03feff04, + 0xfc00ff04, 0xff03ff04, 0x00fbff04, 0x0706ff04, 0xf8f8ff04, 0x000000fc, 0x030300fc, 0xfcfd00fc, + 0x03ff00fc, 0xfc0100fc, 0xff0400fc, 0x00fc00fc, 0x070700fc, 0xf8f900fc, 0x00000707, 0x03030707, + 0xfcfd0707, 0x03ff0707, 0xfc010707, 0xff040707, 0x00fc0707, 0x07070707, 0xf8f90707, 0xfffff8f9, + 0x0302f8f9, 0xfcfcf8f9, 0x03fef8f9, 0xfc00f8f9, 0xff03f8f9, 0x00fbf8f9, 0x0706f8f9, 0xf8f8f8f9, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000404, 0xfffffbfc, 0x000004ff, 0xfffffb01, 0xffffff05, 0x000000fb, 0x00000a03, + 0xfffff5fd, 0x0000030a, 0xfffffcf6, 0x00000909, 0xfffff6f7, 0x000006f9, 0xfffff907, 0x00000bfd, + 0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001108, 0xffffeef8, 0x00000811, 0xfffff7ef, 0x00001111, + 0xffffeeef, 0x00001301, 0xffffecff, 0x00000113, 0xfffffeed, 0x00000ff5, 0xfffff00b, 0xfffff510, + 0x00000af0, 0x000016fa, 0xffffe906, 0xfffffa17, 0x000005e9, 0x00001f12, 0xffffe0ee, 0x0000121f, + 0xffffede1, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002121, 0xffffdedf, 0x000023ff, + 0xffffdc01, 0xffffff24, 0x000000dc, 0x000016e9, 0xffffe917, 0x00001eef, 0xffffe111, 0xffffef1f, + 0x000010e1, 0x00003615, 0xffffc9eb, 0x00001536, 0xffffeaca, 0x00003725, 0xffffc8db, 0x00002537, + 0xffffdac9, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003908, 0xffffc6f8, 0x00000839, + 0xfffff7c7, 0x00003d3d, 0xffffc2c3, 0x000041fb, 0xffffbe05, 0xfffffb42, 0x000004be, 0x00002cdc, + 0xffffd324, 0xffffdc2d, 0x000023d3, 0x00003be3, 0xffffc41d, 0xffffe33c, 0x00001cc4, 0x00005c2d, + 0xffffa3d3, 0x00002d5c, 0xffffd2a4, 0x00005d19, 0xffffa2e7, 0x0000195d, 0xffffe6a3, 0x00006147, + 0xffff9eb9, 0x00004761, 0xffffb89f, 0x000052ea, 0xffffad16, 0xffffea53, 0x000015ad, 0x00006607, + 0xffff99f9, 0x00000766, 0xfffff89a, 0x00006d6d, 0xffff9293, 0x000043bc, 0xffffbc44, 0x000054c7, + 0xffffab39, 0xffffc755, 0x000038ab, 0x000077f3, 0xffff880d, 0xfffff378, 0x00000c88, 0x00006dcf, + 0xffff9231, 0xffffcf6e, 0x00003092, 0x00007a98, 0xffff8568, 0xffff987b, 0x00006785, 0x00001818, + 0xffffe7e8, 0x00002e2e, 0xffffd1d2, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000, + 0x04ff0000, 0xfb010000, 0xff050000, 0x00fb0000, 0x0a030000, 0xf5fd0000, 0x030a0000, 0x00000404, + 0x04040404, 0xfbfc0404, 0x04ff0404, 0xfb010404, 0xff050404, 0x00fb0404, 0x0a030404, 0xf5fd0404, + 0x030a0404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x04fefbfc, 0xfb00fbfc, 0xff04fbfc, 0x00fafbfc, + 0x0a02fbfc, 0xf5fcfbfc, 0x0309fbfc, 0x000004ff, 0x040404ff, 0xfbfc04ff, 0x04ff04ff, 0xfb0104ff, + 0xff0504ff, 0x00fb04ff, 0x0a0304ff, 0xf5fd04ff, 0x030a04ff, 0xfffffb01, 0x0403fb01, 0xfbfbfb01, + 0x04fefb01, 0xfb00fb01, 0xff04fb01, 0x00fafb01, 0x0a02fb01, 0xf5fcfb01, 0x0309fb01, 0xffffff05, + 0x0403ff05, 0xfbfbff05, 0x04feff05, 0xfb00ff05, 0xff04ff05, 0x00faff05, 0x0a02ff05, 0xf5fcff05, + 0x0309ff05, 0x000000fb, 0x040400fb, 0xfbfc00fb, 0x04ff00fb, 0xfb0100fb, 0xff0500fb, 0x00fb00fb, + 0x0a0300fb, 0xf5fd00fb, 0x030a00fb, 0x00000a03, 0x04040a03, 0xfbfc0a03, 0x04ff0a03, 0xfb010a03, + 0xff050a03, 0x00fb0a03, 0x0a030a03, 0xf5fd0a03, 0x030a0a03, 0xfffff5fd, 0x0403f5fd, 0xfbfbf5fd, + 0x04fef5fd, 0xfb00f5fd, 0xff04f5fd, 0x00faf5fd, 0x0a02f5fd, 0xf5fcf5fd, 0x0309f5fd, 0x0000030a, + 0x0404030a, 0xfbfc030a, 0x04ff030a, 0xfb01030a, 0xff05030a, 0x00fb030a, 0x0a03030a, 0xf5fd030a, + 0x030a030a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000505, 0xfffffafb, 0x000006fe, 0xfffff902, 0xfffffe07, 0x000001f9, 0x00000b0b, + 0xfffff4f5, 0x00000d03, 0xfffff2fd, 0x0000030d, 0xfffffcf3, 0x000008f7, 0xfffff709, 0x00000efc, + 0xfffff104, 0xfffffc0f, 0x000003f1, 0x0000160b, 0xffffe9f5, 0x00000b16, 0xfffff4ea, 0x00001515, + 0xffffeaeb, 0x00001802, 0xffffe7fe, 0x00000218, 0xfffffde8, 0x000013f2, 0xffffec0e, 0xfffff214, + 0x00000dec, 0x00002617, 0xffffd9e9, 0x00001726, 0xffffe8da, 0x00001cf8, 0xffffe308, 0xfffff81d, + 0x000007e3, 0x0000270b, 0xffffd8f5, 0x00000b27, 0xfffff4d9, 0x00002929, 0xffffd6d7, 0x00002cff, + 0xffffd301, 0xffffff2d, 0x000000d3, 0x00001ce3, 0xffffe31d, 0x000026ea, 0xffffd916, 0xffffea27, + 0x000015d9, 0x0000431b, 0xffffbce5, 0x00001b43, 0xffffe4bd, 0x0000452f, 0xffffbad1, 0x00002f45, + 0xffffd0bb, 0x000037f1, 0xffffc80f, 0xfffff138, 0x00000ec8, 0x0000470b, 0xffffb8f5, 0x00000b47, + 0xfffff4b9, 0x00004c4c, 0xffffb3b4, 0x000052fa, 0xffffad06, 0xfffffa53, 0x000005ad, 0x000038d3, + 0xffffc72d, 0xffffd339, 0x00002cc7, 0x00004adc, 0xffffb524, 0xffffdc4b, 0x000023b5, 0x00007338, + 0xffff8cc8, 0x00003873, 0xffffc78d, 0x0000751f, 0xffff8ae1, 0x00001f75, 0xffffe08b, 0x00007a58, + 0xffff85a8, 0x0000587a, 0xffffa786, 0x000067e4, 0xffff981c, 0xffffe468, 0x00001b98, 0x000054ab, + 0xffffab55, 0x000069b8, 0xffff9648, 0xffffb86a, 0x00004796, 0x00001e1e, 0xffffe1e2, 0x00003a3a, + 0xffffc5c6, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x06fe0000, 0xf9020000, + 0xfe070000, 0x01f90000, 0x0b0b0000, 0xf4f50000, 0x0d030000, 0xf2fd0000, 0x00000505, 0x05050505, + 0xfafb0505, 0x06fe0505, 0xf9020505, 0xfe070505, 0x01f90505, 0x0b0b0505, 0xf4f50505, 0x0d030505, + 0xf2fd0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x06fdfafb, 0xf901fafb, 0xfe06fafb, 0x01f8fafb, + 0x0b0afafb, 0xf4f4fafb, 0x0d02fafb, 0xf2fcfafb, 0x000006fe, 0x050506fe, 0xfafb06fe, 0x06fe06fe, + 0xf90206fe, 0xfe0706fe, 0x01f906fe, 0x0b0b06fe, 0xf4f506fe, 0x0d0306fe, 0xf2fd06fe, 0xfffff902, + 0x0504f902, 0xfafaf902, 0x06fdf902, 0xf901f902, 0xfe06f902, 0x01f8f902, 0x0b0af902, 0xf4f4f902, + 0x0d02f902, 0xf2fcf902, 0xfffffe07, 0x0504fe07, 0xfafafe07, 0x06fdfe07, 0xf901fe07, 0xfe06fe07, + 0x01f8fe07, 0x0b0afe07, 0xf4f4fe07, 0x0d02fe07, 0xf2fcfe07, 0x000001f9, 0x050501f9, 0xfafb01f9, + 0x06fe01f9, 0xf90201f9, 0xfe0701f9, 0x01f901f9, 0x0b0b01f9, 0xf4f501f9, 0x0d0301f9, 0xf2fd01f9, + 0x00000b0b, 0x05050b0b, 0xfafb0b0b, 0x06fe0b0b, 0xf9020b0b, 0xfe070b0b, 0x01f90b0b, 0x0b0b0b0b, + 0xf4f50b0b, 0x0d030b0b, 0xf2fd0b0b, 0xfffff4f5, 0x0504f4f5, 0xfafaf4f5, 0x06fdf4f5, 0xf901f4f5, + 0xfe06f4f5, 0x01f8f4f5, 0x0b0af4f5, 0xf4f4f4f5, 0x0d02f4f5, 0xf2fcf4f5, 0x00000d03, 0x05050d03, + 0xfafb0d03, 0x06fe0d03, 0xf9020d03, 0xfe070d03, 0x01f90d03, 0x0b0b0d03, 0xf4f50d03, 0x0d030d03, + 0xf2fd0d03, 0xfffff2fd, 0x0504f2fd, 0xfafaf2fd, 0x06fdf2fd, 0xf901f2fd, 0xfe06f2fd, 0x01f8f2fd, + 0x0b0af2fd, 0xf4f4f2fd, 0x0d02f2fd, 0xf2fcf2fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000606, 0xfffff9fa, 0x000007fe, 0xfffff802, 0xfffffe08, 0x000001f8, 0x00000d0d, + 0xfffff2f3, 0x00000f04, 0xfffff0fc, 0x0000040f, 0xfffffbf1, 0x00000af5, 0xfffff50b, 0x000011fb, + 0xffffee05, 0xfffffb12, 0x000004ee, 0x00001a0d, 0xffffe5f3, 0x00000d1a, 0xfffff2e6, 0x00001a1a, + 0xffffe5e6, 0x00001d02, 0xffffe2fe, 0x0000021d, 0xfffffde3, 0x000017f0, 0xffffe810, 0xfffff018, + 0x00000fe8, 0x00002e1c, 0xffffd1e4, 0x00001c2e, 0xffffe3d2, 0x000022f7, 0xffffdd09, 0xfffff723, + 0x000008dd, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003131, 0xffffcecf, 0x000035ff, + 0xffffca01, 0xffffff36, 0x000000ca, 0x000022dd, 0xffffdd23, 0x00002ee6, 0xffffd11a, 0xffffe62f, + 0x000019d1, 0x00005120, 0xffffaee0, 0x00002051, 0xffffdfaf, 0x00005338, 0xffffacc8, 0x00003853, + 0xffffc7ad, 0x000042ee, 0xffffbd12, 0xffffee43, 0x000011bd, 0x0000560d, 0xffffa9f3, 0x00000d56, + 0xfffff2aa, 0x00005b5b, 0xffffa4a5, 0x000062f9, 0xffff9d07, 0xfffff963, 0x0000069d, 0x000043ca, + 0xffffbc36, 0xffffca44, 0x000035bc, 0x000059d4, 0xffffa62c, 0xffffd45a, 0x00002ba6, 0x00007bdf, + 0xffff8421, 0xffffdf7c, 0x00002084, 0x00006699, 0xffff9967, 0x00007eaa, 0xffff8156, 0xffffaa7f, + 0x00005581, 0x00002525, 0xffffdadb, 0x00004545, 0xffffbabb, 0x00000000, 0x06060000, 0xf9fa0000, + 0x07fe0000, 0xf8020000, 0xfe080000, 0x01f80000, 0x0d0d0000, 0xf2f30000, 0x0f040000, 0xf0fc0000, + 0x040f0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x07fe0606, 0xf8020606, 0xfe080606, 0x01f80606, + 0x0d0d0606, 0xf2f30606, 0x0f040606, 0xf0fc0606, 0x040f0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa, + 0x07fdf9fa, 0xf801f9fa, 0xfe07f9fa, 0x01f7f9fa, 0x0d0cf9fa, 0xf2f2f9fa, 0x0f03f9fa, 0xf0fbf9fa, + 0x040ef9fa, 0x000007fe, 0x060607fe, 0xf9fa07fe, 0x07fe07fe, 0xf80207fe, 0xfe0807fe, 0x01f807fe, + 0x0d0d07fe, 0xf2f307fe, 0x0f0407fe, 0xf0fc07fe, 0x040f07fe, 0xfffff802, 0x0605f802, 0xf9f9f802, + 0x07fdf802, 0xf801f802, 0xfe07f802, 0x01f7f802, 0x0d0cf802, 0xf2f2f802, 0x0f03f802, 0xf0fbf802, + 0x040ef802, 0xfffffe08, 0x0605fe08, 0xf9f9fe08, 0x07fdfe08, 0xf801fe08, 0xfe07fe08, 0x01f7fe08, + 0x0d0cfe08, 0xf2f2fe08, 0x0f03fe08, 0xf0fbfe08, 0x040efe08, 0x000001f8, 0x060601f8, 0xf9fa01f8, + 0x07fe01f8, 0xf80201f8, 0xfe0801f8, 0x01f801f8, 0x0d0d01f8, 0xf2f301f8, 0x0f0401f8, 0xf0fc01f8, + 0x040f01f8, 0x00000d0d, 0x06060d0d, 0xf9fa0d0d, 0x07fe0d0d, 0xf8020d0d, 0xfe080d0d, 0x01f80d0d, + 0x0d0d0d0d, 0xf2f30d0d, 0x0f040d0d, 0xf0fc0d0d, 0x040f0d0d, 0xfffff2f3, 0x0605f2f3, 0xf9f9f2f3, + 0x07fdf2f3, 0xf801f2f3, 0xfe07f2f3, 0x01f7f2f3, 0x0d0cf2f3, 0xf2f2f2f3, 0x0f03f2f3, 0xf0fbf2f3, + 0x040ef2f3, 0x00000f04, 0x06060f04, 0xf9fa0f04, 0x07fe0f04, 0xf8020f04, 0xfe080f04, 0x01f80f04, + 0x0d0d0f04, 0xf2f30f04, 0x0f040f04, 0xf0fc0f04, 0x040f0f04, 0xfffff0fc, 0x0605f0fc, 0xf9f9f0fc, + 0x07fdf0fc, 0xf801f0fc, 0xfe07f0fc, 0x01f7f0fc, 0x0d0cf0fc, 0xf2f2f0fc, 0x0f03f0fc, 0xf0fbf0fc, + 0x040ef0fc, 0x0000040f, 0x0606040f, 0xf9fa040f, 0x07fe040f, 0xf802040f, 0xfe08040f, 0x01f8040f, + 0x0d0d040f, 0xf2f3040f, 0x0f04040f, 0xf0fc040f, 0x040f040f, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000707, 0xfffff8f9, 0x000009fd, 0xfffff603, 0xfffffd0a, 0x000002f6, 0x00001010, + 0xffffeff0, 0x00001205, 0xffffedfb, 0x00000512, 0xfffffaee, 0x00000cf3, 0xfffff30d, 0x000014fa, + 0xffffeb06, 0xfffffa15, 0x000005eb, 0x00001e0f, 0xffffe1f1, 0x00000f1e, 0xfffff0e2, 0x00001e1e, + 0xffffe1e2, 0x00002202, 0xffffddfe, 0x00000222, 0xfffffdde, 0x00001bed, 0xffffe413, 0xffffed1c, + 0x000012e4, 0x00003620, 0xffffc9e0, 0x00002036, 0xffffdfca, 0x000028f5, 0xffffd70b, 0xfffff529, + 0x00000ad7, 0x0000370f, 0xffffc8f1, 0x00000f37, 0xfffff0c9, 0x00003939, 0xffffc6c7, 0x00003eff, + 0xffffc101, 0xffffff3f, 0x000000c1, 0x000027d8, 0xffffd828, 0x000036e2, 0xffffc91e, 0xffffe237, + 0x00001dc9, 0x00005e25, 0xffffa1db, 0x0000255e, 0xffffdaa2, 0x00006041, 0xffff9fbf, 0x00004160, + 0xffffbea0, 0x00004deb, 0xffffb215, 0xffffeb4e, 0x000014b2, 0x0000640f, 0xffff9bf1, 0x00000f64, + 0xfffff09c, 0x00006a6a, 0xffff9596, 0x000073f8, 0xffff8c08, 0xfffff874, 0x0000078c, 0x00004ec1, + 0xffffb13f, 0xffffc14f, 0x00003eb1, 0x000068cd, 0xffff9733, 0xffffcd69, 0x00003297, 0x00007788, + 0xffff8878, 0x00002b2b, 0xffffd4d5, 0x00005050, 0xffffafb0, 0x00000000, 0x07070000, 0xf8f90000, + 0x09fd0000, 0xf6030000, 0xfd0a0000, 0x02f60000, 0x10100000, 0xeff00000, 0x12050000, 0xedfb0000, + 0x05120000, 0x00000707, 0x07070707, 0xf8f90707, 0x09fd0707, 0xf6030707, 0xfd0a0707, 0x02f60707, + 0x10100707, 0xeff00707, 0x12050707, 0xedfb0707, 0x05120707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9, + 0x09fcf8f9, 0xf602f8f9, 0xfd09f8f9, 0x02f5f8f9, 0x100ff8f9, 0xefeff8f9, 0x1204f8f9, 0xedfaf8f9, + 0x0511f8f9, 0x000009fd, 0x070709fd, 0xf8f909fd, 0x09fd09fd, 0xf60309fd, 0xfd0a09fd, 0x02f609fd, + 0x101009fd, 0xeff009fd, 0x120509fd, 0xedfb09fd, 0x051209fd, 0xfffff603, 0x0706f603, 0xf8f8f603, + 0x09fcf603, 0xf602f603, 0xfd09f603, 0x02f5f603, 0x100ff603, 0xefeff603, 0x1204f603, 0xedfaf603, + 0x0511f603, 0xfffffd0a, 0x0706fd0a, 0xf8f8fd0a, 0x09fcfd0a, 0xf602fd0a, 0xfd09fd0a, 0x02f5fd0a, + 0x100ffd0a, 0xefeffd0a, 0x1204fd0a, 0xedfafd0a, 0x0511fd0a, 0x000002f6, 0x070702f6, 0xf8f902f6, + 0x09fd02f6, 0xf60302f6, 0xfd0a02f6, 0x02f602f6, 0x101002f6, 0xeff002f6, 0x120502f6, 0xedfb02f6, + 0x051202f6, 0x00001010, 0x07071010, 0xf8f91010, 0x09fd1010, 0xf6031010, 0xfd0a1010, 0x02f61010, + 0x10101010, 0xeff01010, 0x12051010, 0xedfb1010, 0x05121010, 0xffffeff0, 0x0706eff0, 0xf8f8eff0, + 0x09fceff0, 0xf602eff0, 0xfd09eff0, 0x02f5eff0, 0x100feff0, 0xefefeff0, 0x1204eff0, 0xedfaeff0, + 0x0511eff0, 0x00001205, 0x07071205, 0xf8f91205, 0x09fd1205, 0xf6031205, 0xfd0a1205, 0x02f61205, + 0x10101205, 0xeff01205, 0x12051205, 0xedfb1205, 0x05121205, 0xffffedfb, 0x0706edfb, 0xf8f8edfb, + 0x09fcedfb, 0xf602edfb, 0xfd09edfb, 0x02f5edfb, 0x100fedfb, 0xefefedfb, 0x1204edfb, 0xedfaedfb, + 0x0511edfb, 0x00000512, 0x07070512, 0xf8f90512, 0x09fd0512, 0xf6030512, 0xfd0a0512, 0x02f60512, + 0x10100512, 0xeff00512, 0x12050512, 0xedfb0512, 0x05120512, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000808, 0xfffff7f8, 0x00000afd, 0xfffff503, 0xfffffd0b, 0x000002f5, 0x00001212, + 0xffffedee, 0x00001405, 0xffffebfb, 0x00000514, 0xfffffaec, 0x00000ef1, 0xfffff10f, 0x000017f9, + 0xffffe807, 0xfffff918, 0x000006e8, 0x00002311, 0xffffdcef, 0x00001123, 0xffffeedd, 0x00002222, + 0xffffddde, 0x00002603, 0xffffd9fd, 0x00000326, 0xfffffcda, 0x00001fea, 0xffffe016, 0xffffea20, + 0x000015e0, 0x00003d25, 0xffffc2db, 0x0000253d, 0xffffdac3, 0x00002ef3, 0xffffd10d, 0xfffff32f, + 0x00000cd1, 0x00003f11, 0xffffc0ef, 0x0000113f, 0xffffeec1, 0x00004141, 0xffffbebf, 0x000047ff, + 0xffffb801, 0xffffff48, 0x000000b8, 0x00002dd2, 0xffffd22e, 0x00003edd, 0xffffc123, 0xffffdd3f, + 0x000022c1, 0x00006b2b, 0xffff94d5, 0x00002b6b, 0xffffd495, 0x00006e4b, 0xffff91b5, 0x00004b6e, + 0xffffb492, 0x000058e8, 0xffffa718, 0xffffe859, 0x000017a7, 0x00007211, 0xffff8def, 0x00001172, + 0xffffee8e, 0x00007979, 0xffff8687, 0x00005ab8, 0xffffa548, 0xffffb85b, 0x000047a5, 0x000077c6, + 0xffff883a, 0xffffc678, 0x00003988, 0x00003131, 0xffffcecf, 0x00005c5c, 0xffffa3a4, 0x00000000, + 0x08080000, 0xf7f80000, 0x0afd0000, 0xf5030000, 0xfd0b0000, 0x02f50000, 0x12120000, 0xedee0000, + 0x14050000, 0xebfb0000, 0x05140000, 0x00000808, 0x08080808, 0xf7f80808, 0x0afd0808, 0xf5030808, + 0xfd0b0808, 0x02f50808, 0x12120808, 0xedee0808, 0x14050808, 0xebfb0808, 0x05140808, 0xfffff7f8, + 0x0807f7f8, 0xf7f7f7f8, 0x0afcf7f8, 0xf502f7f8, 0xfd0af7f8, 0x02f4f7f8, 0x1211f7f8, 0xededf7f8, + 0x1404f7f8, 0xebfaf7f8, 0x0513f7f8, 0x00000afd, 0x08080afd, 0xf7f80afd, 0x0afd0afd, 0xf5030afd, + 0xfd0b0afd, 0x02f50afd, 0x12120afd, 0xedee0afd, 0x14050afd, 0xebfb0afd, 0x05140afd, 0xfffff503, + 0x0807f503, 0xf7f7f503, 0x0afcf503, 0xf502f503, 0xfd0af503, 0x02f4f503, 0x1211f503, 0xededf503, + 0x1404f503, 0xebfaf503, 0x0513f503, 0xfffffd0b, 0x0807fd0b, 0xf7f7fd0b, 0x0afcfd0b, 0xf502fd0b, + 0xfd0afd0b, 0x02f4fd0b, 0x1211fd0b, 0xededfd0b, 0x1404fd0b, 0xebfafd0b, 0x0513fd0b, 0x000002f5, + 0x080802f5, 0xf7f802f5, 0x0afd02f5, 0xf50302f5, 0xfd0b02f5, 0x02f502f5, 0x121202f5, 0xedee02f5, + 0x140502f5, 0xebfb02f5, 0x051402f5, 0x00001212, 0x08081212, 0xf7f81212, 0x0afd1212, 0xf5031212, + 0xfd0b1212, 0x02f51212, 0x12121212, 0xedee1212, 0x14051212, 0xebfb1212, 0x05141212, 0xffffedee, + 0x0807edee, 0xf7f7edee, 0x0afcedee, 0xf502edee, 0xfd0aedee, 0x02f4edee, 0x1211edee, 0xedededee, + 0x1404edee, 0xebfaedee, 0x0513edee, 0x00001405, 0x08081405, 0xf7f81405, 0x0afd1405, 0xf5031405, + 0xfd0b1405, 0x02f51405, 0x12121405, 0xedee1405, 0x14051405, 0xebfb1405, 0x05141405, 0xffffebfb, + 0x0807ebfb, 0xf7f7ebfb, 0x0afcebfb, 0xf502ebfb, 0xfd0aebfb, 0x02f4ebfb, 0x1211ebfb, 0xededebfb, + 0x1404ebfb, 0xebfaebfb, 0x0513ebfb, 0x00000514, 0x08080514, 0xf7f80514, 0x0afd0514, 0xf5030514, + 0xfd0b0514, 0x02f50514, 0x12120514, 0xedee0514, 0x14050514, 0xebfb0514, 0x05140514, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000909, 0xfffff6f7, 0x00000bfd, 0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001414, + 0xffffebec, 0x00001706, 0xffffe8fa, 0x00000617, 0xfffff9e9, 0x000010ef, 0xffffef11, 0x00001af9, + 0xffffe507, 0xfffff91b, 0x000006e5, 0x00002713, 0xffffd8ed, 0x00001327, 0xffffecd9, 0x00002727, + 0xffffd8d9, 0x00002b03, 0xffffd4fd, 0x0000032b, 0xfffffcd5, 0x000023e8, 0xffffdc18, 0xffffe824, + 0x000017dc, 0x0000452a, 0xffffbad6, 0x00002a45, 0xffffd5bb, 0x000034f2, 0xffffcb0e, 0xfffff235, + 0x00000dcb, 0x00004713, 0xffffb8ed, 0x00001347, 0xffffecb9, 0x00004949, 0xffffb6b7, 0x00004ffe, + 0xffffb002, 0xfffffe50, 0x000001b0, 0x000033cc, 0xffffcc34, 0x000045d9, 0xffffba27, 0xffffd946, + 0x000026ba, 0x00007930, 0xffff86d0, 0x00003079, 0xffffcf87, 0x00007c54, 0xffff83ac, 0x0000547c, + 0xffffab84, 0x000063e5, 0xffff9c1b, 0xffffe564, 0x00001a9c, 0x000065af, 0xffff9a51, 0xffffaf66, + 0x0000509a, 0x00003737, 0xffffc8c9, 0x00006868, 0xffff9798, 0x00000000, 0x09090000, 0xf6f70000, + 0x0bfd0000, 0xf4030000, 0xfd0c0000, 0x02f40000, 0x14140000, 0xebec0000, 0x17060000, 0xe8fa0000, + 0x06170000, 0xf9e90000, 0x00000909, 0x09090909, 0xf6f70909, 0x0bfd0909, 0xf4030909, 0xfd0c0909, + 0x02f40909, 0x14140909, 0xebec0909, 0x17060909, 0xe8fa0909, 0x06170909, 0xf9e90909, 0xfffff6f7, + 0x0908f6f7, 0xf6f6f6f7, 0x0bfcf6f7, 0xf402f6f7, 0xfd0bf6f7, 0x02f3f6f7, 0x1413f6f7, 0xebebf6f7, + 0x1705f6f7, 0xe8f9f6f7, 0x0616f6f7, 0xf9e8f6f7, 0x00000bfd, 0x09090bfd, 0xf6f70bfd, 0x0bfd0bfd, + 0xf4030bfd, 0xfd0c0bfd, 0x02f40bfd, 0x14140bfd, 0xebec0bfd, 0x17060bfd, 0xe8fa0bfd, 0x06170bfd, + 0xf9e90bfd, 0xfffff403, 0x0908f403, 0xf6f6f403, 0x0bfcf403, 0xf402f403, 0xfd0bf403, 0x02f3f403, + 0x1413f403, 0xebebf403, 0x1705f403, 0xe8f9f403, 0x0616f403, 0xf9e8f403, 0xfffffd0c, 0x0908fd0c, + 0xf6f6fd0c, 0x0bfcfd0c, 0xf402fd0c, 0xfd0bfd0c, 0x02f3fd0c, 0x1413fd0c, 0xebebfd0c, 0x1705fd0c, + 0xe8f9fd0c, 0x0616fd0c, 0xf9e8fd0c, 0x000002f4, 0x090902f4, 0xf6f702f4, 0x0bfd02f4, 0xf40302f4, + 0xfd0c02f4, 0x02f402f4, 0x141402f4, 0xebec02f4, 0x170602f4, 0xe8fa02f4, 0x061702f4, 0xf9e902f4, + 0x00001414, 0x09091414, 0xf6f71414, 0x0bfd1414, 0xf4031414, 0xfd0c1414, 0x02f41414, 0x14141414, + 0xebec1414, 0x17061414, 0xe8fa1414, 0x06171414, 0xf9e91414, 0xffffebec, 0x0908ebec, 0xf6f6ebec, + 0x0bfcebec, 0xf402ebec, 0xfd0bebec, 0x02f3ebec, 0x1413ebec, 0xebebebec, 0x1705ebec, 0xe8f9ebec, + 0x0616ebec, 0xf9e8ebec, 0x00001706, 0x09091706, 0xf6f71706, 0x0bfd1706, 0xf4031706, 0xfd0c1706, + 0x02f41706, 0x14141706, 0xebec1706, 0x17061706, 0xe8fa1706, 0x06171706, 0xf9e91706, 0xffffe8fa, + 0x0908e8fa, 0xf6f6e8fa, 0x0bfce8fa, 0xf402e8fa, 0xfd0be8fa, 0x02f3e8fa, 0x1413e8fa, 0xebebe8fa, + 0x1705e8fa, 0xe8f9e8fa, 0x0616e8fa, 0xf9e8e8fa, 0x00000617, 0x09090617, 0xf6f70617, 0x0bfd0617, + 0xf4030617, 0xfd0c0617, 0x02f40617, 0x14140617, 0xebec0617, 0x17060617, 0xe8fa0617, 0x06170617, + 0xf9e90617, 0xfffff9e9, 0x0908f9e9, 0xf6f6f9e9, 0x0bfcf9e9, 0xf402f9e9, 0xfd0bf9e9, 0x02f3f9e9, + 0x1413f9e9, 0xebebf9e9, 0x1705f9e9, 0xe8f9f9e9, 0x0616f9e9, 0xf9e8f9e9, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, + 0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x000003fc, 0xfffffc04, 0x000005fe, + 0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000804, 0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808, + 0xfffff7f8, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000007fc, 0xfffff804, 0xfffffc08, + 0x000003f8, 0x00000e08, 0xfffff1f8, 0x0000080e, 0xfffff7f2, 0x00000bfe, 0xfffff402, 0xfffffe0c, + 0x000001f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200, + 0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff8, 0xfffff008, 0xfffff810, + 0x000007f0, 0x00001a0a, 0xffffe5f6, 0x00000a1a, 0xfffff5e6, 0x00001c12, 0xffffe3ee, 0x0000121c, + 0xffffede4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001c04, 0xffffe3fc, 0x0000041c, + 0xfffffbe4, 0x00001e1e, 0xffffe1e2, 0x00001ffe, 0xffffe002, 0xfffffe20, 0x000001e0, 0x000015ee, + 0xffffea12, 0xffffee16, 0x000011ea, 0x00001df2, 0xffffe20e, 0xfffff21e, 0x00000de2, 0x00002e16, + 0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002e0c, 0xffffd1f4, 0x00000c2e, 0xfffff3d2, 0x00003022, + 0xffffcfde, 0x00002230, 0xffffddd0, 0x000027f6, 0xffffd80a, 0xfffff628, 0x000009d8, 0x00003204, + 0xffffcdfc, 0x00000432, 0xfffffbce, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e4, + 0xffffd61c, 0xffffe42a, 0x00001bd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1a, + 0xffffb3e6, 0x00001a4c, 0xffffe5b4, 0x00004c2a, 0xffffb3d6, 0x00002a4c, 0xffffd5b4, 0x000035e8, + 0xffffca18, 0xffffe836, 0x000017ca, 0x00004e0e, 0xffffb1f2, 0x00000e4e, 0xfffff1b2, 0x0000523e, + 0xffffadc2, 0x00003e52, 0xffffc1ae, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802, + 0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005c5c, 0xffffa3a4, 0x00003bcc, 0xffffc434, 0xffffcc3c, + 0x000033c4, 0x00007634, 0xffff89cc, 0x00003476, 0xffffcb8a, 0x000049d4, 0xffffb62c, 0xffffd44a, + 0x00002bb6, 0x0000764a, 0xffff89b6, 0x00004a76, 0xffffb58a, 0x00007620, 0xffff89e0, 0x00002076, + 0xffffdf8a, 0x000065f4, 0xffff9a0c, 0xfffff466, 0x00000b9a, 0x00005fd8, 0xffffa028, 0xffffd860, + 0x000027a0, 0x000075de, 0xffff8a22, 0xffffde76, 0x0000218a, 0x000057a8, 0xffffa858, 0x000067b2, + 0xffff984e, 0xffffb268, 0x00004d98, 0x00000c0c, 0xfffff3f4, 0x00001616, 0xffffe9ea, 0x00002a2a, + 0xffffd5d6, 0x00004848, 0xffffb7b8, 0x00000000, 0x02020000, 0xfdfe0000, 0x02000000, 0xfe000000, + 0x00020000, 0xfffe0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02000202, 0xfe000202, 0x00020202, + 0xfffe0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x01fffdfe, 0xfdfffdfe, 0x0001fdfe, 0xfffdfdfe, + 0x00000200, 0x02020200, 0xfdfe0200, 0x02000200, 0xfe000200, 0x00020200, 0xfffe0200, 0xfffffe00, + 0x0201fe00, 0xfdfdfe00, 0x01fffe00, 0xfdfffe00, 0x0001fe00, 0xfffdfe00, 0x00000002, 0x02020002, + 0xfdfe0002, 0x02000002, 0xfe000002, 0x00020002, 0xfffe0002, 0xfffffffe, 0x0201fffe, 0xfdfdfffe, + 0x01fffffe, 0xfdfffffe, 0x0001fffe, 0xfffdfffe, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000303, 0xfffffcfd, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606, + 0xfffff9fa, 0x00000903, 0xfffff6fd, 0x00000309, 0xfffffcf7, 0x000008fd, 0xfffff703, 0xfffffd09, + 0x000002f7, 0x000005fa, 0xfffffa06, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000c0c, + 0xfffff3f4, 0x00000f00, 0xfffff100, 0x0000000f, 0xfffffff1, 0x00000bf7, 0xfffff409, 0xfffff70c, + 0x000008f4, 0x0000180f, 0xffffe7f1, 0x00000f18, 0xfffff0e8, 0x000011fa, 0xffffee06, 0xfffffa12, + 0x000005ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001b00, + 0xffffe500, 0x0000001b, 0xffffffe5, 0x000011ee, 0xffffee12, 0x000017f4, 0xffffe80c, 0xfffff418, + 0x00000be8, 0x0000270f, 0xffffd8f1, 0x00000f27, 0xfffff0d9, 0x00002a1b, 0xffffd5e5, 0x00001b2a, + 0xffffe4d6, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002a06, 0xffffd5fa, 0x0000062a, + 0xfffff9d6, 0x00002d2d, 0xffffd2d3, 0x000032fd, 0xffffcd03, 0xfffffd33, 0x000002cd, 0x000020e5, + 0xffffdf1b, 0xffffe521, 0x00001adf, 0x00002ceb, 0xffffd315, 0xffffeb2d, 0x000014d3, 0x00004521, + 0xffffbadf, 0x00002145, 0xffffdebb, 0x00004512, 0xffffbaee, 0x00001245, 0xffffedbb, 0x00004836, + 0xffffb7ca, 0x00003648, 0xffffc9b8, 0x00003eee, 0xffffc112, 0xffffee3f, 0x000011c1, 0x00004e06, + 0xffffb1fa, 0x0000064e, 0xfffff9b2, 0x00005151, 0xffffaeaf, 0x000032cd, 0xffffcd33, 0x00003ed6, + 0xffffc12a, 0xffffd63f, 0x000029c1, 0x000059f7, 0xffffa609, 0xfffff75a, 0x000008a6, 0x0000722a, + 0xffff8dd6, 0x00002a72, 0xffffd58e, 0x0000753f, 0xffff8ac1, 0x00003f75, 0xffffc08b, 0x000050dc, + 0xffffaf24, 0xffffdc51, 0x000023af, 0x00007815, 0xffff87eb, 0x00001578, 0xffffea88, 0x00007b60, + 0xffff84a0, 0x0000607b, 0xffff9f85, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005cb2, + 0xffffa34e, 0xffffb25d, 0x00004da3, 0x000071bb, 0xffff8e45, 0xffffbb72, 0x0000448e, 0x00001212, + 0xffffedee, 0x00002121, 0xffffdedf, 0x00003f3f, 0xffffc0c1, 0x00006c6c, 0xffff9394, 0x00000000, + 0x03030000, 0xfcfd0000, 0x03000000, 0xfd000000, 0x00030000, 0xfffd0000, 0x06060000, 0xf9fa0000, + 0x00000303, 0x03030303, 0xfcfd0303, 0x03000303, 0xfd000303, 0x00030303, 0xfffd0303, 0x06060303, + 0xf9fa0303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x02fffcfd, 0xfcfffcfd, 0x0002fcfd, 0xfffcfcfd, + 0x0605fcfd, 0xf9f9fcfd, 0x00000300, 0x03030300, 0xfcfd0300, 0x03000300, 0xfd000300, 0x00030300, + 0xfffd0300, 0x06060300, 0xf9fa0300, 0xfffffd00, 0x0302fd00, 0xfcfcfd00, 0x02fffd00, 0xfcfffd00, + 0x0002fd00, 0xfffcfd00, 0x0605fd00, 0xf9f9fd00, 0x00000003, 0x03030003, 0xfcfd0003, 0x03000003, + 0xfd000003, 0x00030003, 0xfffd0003, 0x06060003, 0xf9fa0003, 0xfffffffd, 0x0302fffd, 0xfcfcfffd, + 0x02fffffd, 0xfcfffffd, 0x0002fffd, 0xfffcfffd, 0x0605fffd, 0xf9f9fffd, 0x00000606, 0x03030606, + 0xfcfd0606, 0x03000606, 0xfd000606, 0x00030606, 0xfffd0606, 0x06060606, 0xf9fa0606, 0xfffff9fa, + 0x0302f9fa, 0xfcfcf9fa, 0x02fff9fa, 0xfcfff9fa, 0x0002f9fa, 0xfffcf9fa, 0x0605f9fa, 0xf9f9f9fa, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000404, 0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000804, + 0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808, 0xfffff7f8, 0x000007f8, 0xfffff808, 0x00000bfc, + 0xfffff404, 0xfffffc0c, 0x000003f4, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00001010, + 0xffffeff0, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00000ff4, 0xfffff00c, 0xfffff410, + 0x00000bf0, 0x000017fc, 0xffffe804, 0xfffffc18, 0x000003e8, 0x00002010, 0xffffdff0, 0x00001020, + 0xffffefe0, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002020, 0xffffdfe0, 0x00002400, + 0xffffdc00, 0x00000024, 0xffffffdc, 0x000017e8, 0xffffe818, 0x00001ff0, 0xffffe010, 0xfffff020, + 0x00000fe0, 0x00003414, 0xffffcbec, 0x00001434, 0xffffebcc, 0x00003824, 0xffffc7dc, 0x00002438, + 0xffffdbc8, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003808, 0xffffc7f8, 0x00000838, + 0xfffff7c8, 0x00003c3c, 0xffffc3c4, 0x00003ffc, 0xffffc004, 0xfffffc40, 0x000003c0, 0x00002bdc, + 0xffffd424, 0xffffdc2c, 0x000023d4, 0x00003be4, 0xffffc41c, 0xffffe43c, 0x00001bc4, 0x00005c2c, + 0xffffa3d4, 0x00002c5c, 0xffffd3a4, 0x00005c18, 0xffffa3e8, 0x0000185c, 0xffffe7a4, 0x00006048, + 0xffff9fb8, 0x00004860, 0xffffb7a0, 0x000053ec, 0xffffac14, 0xffffec54, 0x000013ac, 0x00006408, + 0xffff9bf8, 0x00000864, 0xfffff79c, 0x00006c6c, 0xffff9394, 0x000043bc, 0xffffbc44, 0x000053c8, + 0xffffac38, 0xffffc854, 0x000037ac, 0x000077f4, 0xffff880c, 0xfffff478, 0x00000b88, 0x00006bd0, + 0xffff9430, 0xffffd06c, 0x00002f94, 0x00007b98, 0xffff8468, 0xffff987c, 0x00006784, 0x00001818, + 0xffffe7e8, 0x00002c2c, 0xffffd3d4, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000, + 0x04000000, 0xfc000000, 0x00040000, 0xfffc0000, 0x08040000, 0xf7fc0000, 0x04080000, 0x00000404, + 0x04040404, 0xfbfc0404, 0x04000404, 0xfc000404, 0x00040404, 0xfffc0404, 0x08040404, 0xf7fc0404, + 0x04080404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x03fffbfc, 0xfbfffbfc, 0x0003fbfc, 0xfffbfbfc, + 0x0803fbfc, 0xf7fbfbfc, 0x0407fbfc, 0x00000400, 0x04040400, 0xfbfc0400, 0x04000400, 0xfc000400, + 0x00040400, 0xfffc0400, 0x08040400, 0xf7fc0400, 0x04080400, 0xfffffc00, 0x0403fc00, 0xfbfbfc00, + 0x03fffc00, 0xfbfffc00, 0x0003fc00, 0xfffbfc00, 0x0803fc00, 0xf7fbfc00, 0x0407fc00, 0x00000004, + 0x04040004, 0xfbfc0004, 0x04000004, 0xfc000004, 0x00040004, 0xfffc0004, 0x08040004, 0xf7fc0004, + 0x04080004, 0xfffffffc, 0x0403fffc, 0xfbfbfffc, 0x03fffffc, 0xfbfffffc, 0x0003fffc, 0xfffbfffc, + 0x0803fffc, 0xf7fbfffc, 0x0407fffc, 0x00000804, 0x04040804, 0xfbfc0804, 0x04000804, 0xfc000804, + 0x00040804, 0xfffc0804, 0x08040804, 0xf7fc0804, 0x04080804, 0xfffff7fc, 0x0403f7fc, 0xfbfbf7fc, + 0x03fff7fc, 0xfbfff7fc, 0x0003f7fc, 0xfffbf7fc, 0x0803f7fc, 0xf7fbf7fc, 0x0407f7fc, 0x00000408, + 0x04040408, 0xfbfc0408, 0x04000408, 0xfc000408, 0x00040408, 0xfffc0408, 0x08040408, 0xf7fc0408, + 0x04080408, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000505, 0xfffffafb, 0x00000500, 0xfffffb00, 0x00000005, 0xfffffffb, 0x00000a0a, + 0xfffff5f6, 0x00000f05, 0xfffff0fb, 0x0000050f, 0xfffffaf1, 0x000009f6, 0xfffff60a, 0x00000efb, + 0xfffff105, 0xfffffb0f, 0x000004f1, 0x0000140a, 0xffffebf6, 0x00000a14, 0xfffff5ec, 0x00001414, + 0xffffebec, 0x00001900, 0xffffe700, 0x00000019, 0xffffffe7, 0x000013f1, 0xffffec0f, 0xfffff114, + 0x00000eec, 0x00002819, 0xffffd7e7, 0x00001928, 0xffffe6d8, 0x00001df6, 0xffffe20a, 0xfffff61e, + 0x000009e2, 0x0000280a, 0xffffd7f6, 0x00000a28, 0xfffff5d8, 0x00002828, 0xffffd7d8, 0x00002d00, + 0xffffd300, 0x0000002d, 0xffffffd3, 0x00001de2, 0xffffe21e, 0x000027ec, 0xffffd814, 0xffffec28, + 0x000013d8, 0x00004119, 0xffffbee7, 0x00001941, 0xffffe6bf, 0x0000462d, 0xffffb9d3, 0x00002d46, + 0xffffd2ba, 0x000036f1, 0xffffc90f, 0xfffff137, 0x00000ec9, 0x0000460a, 0xffffb9f6, 0x00000a46, + 0xfffff5ba, 0x00004b4b, 0xffffb4b5, 0x000054fb, 0xffffab05, 0xfffffb55, 0x000004ab, 0x000036d3, + 0xffffc92d, 0xffffd337, 0x00002cc9, 0x00004add, 0xffffb523, 0xffffdd4b, 0x000022b5, 0x00007337, + 0xffff8cc9, 0x00003773, 0xffffc88d, 0x0000731e, 0xffff8ce2, 0x00001e73, 0xffffe18d, 0x0000785a, + 0xffff87a6, 0x00005a78, 0xffffa588, 0x000068e2, 0xffff971e, 0xffffe269, 0x00001d97, 0x000054ab, + 0xffffab55, 0x000068ba, 0xffff9746, 0xffffba69, 0x00004597, 0x00001e1e, 0xffffe1e2, 0x00003c3c, + 0xffffc3c4, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x05000000, 0xfb000000, + 0x00050000, 0xfffb0000, 0x0a0a0000, 0xf5f60000, 0x0f050000, 0xf0fb0000, 0x00000505, 0x05050505, + 0xfafb0505, 0x05000505, 0xfb000505, 0x00050505, 0xfffb0505, 0x0a0a0505, 0xf5f60505, 0x0f050505, + 0xf0fb0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x04fffafb, 0xfafffafb, 0x0004fafb, 0xfffafafb, + 0x0a09fafb, 0xf5f5fafb, 0x0f04fafb, 0xf0fafafb, 0x00000500, 0x05050500, 0xfafb0500, 0x05000500, + 0xfb000500, 0x00050500, 0xfffb0500, 0x0a0a0500, 0xf5f60500, 0x0f050500, 0xf0fb0500, 0xfffffb00, + 0x0504fb00, 0xfafafb00, 0x04fffb00, 0xfafffb00, 0x0004fb00, 0xfffafb00, 0x0a09fb00, 0xf5f5fb00, + 0x0f04fb00, 0xf0fafb00, 0x00000005, 0x05050005, 0xfafb0005, 0x05000005, 0xfb000005, 0x00050005, + 0xfffb0005, 0x0a0a0005, 0xf5f60005, 0x0f050005, 0xf0fb0005, 0xfffffffb, 0x0504fffb, 0xfafafffb, + 0x04fffffb, 0xfafffffb, 0x0004fffb, 0xfffafffb, 0x0a09fffb, 0xf5f5fffb, 0x0f04fffb, 0xf0fafffb, + 0x00000a0a, 0x05050a0a, 0xfafb0a0a, 0x05000a0a, 0xfb000a0a, 0x00050a0a, 0xfffb0a0a, 0x0a0a0a0a, + 0xf5f60a0a, 0x0f050a0a, 0xf0fb0a0a, 0xfffff5f6, 0x0504f5f6, 0xfafaf5f6, 0x04fff5f6, 0xfafff5f6, + 0x0004f5f6, 0xfffaf5f6, 0x0a09f5f6, 0xf5f5f5f6, 0x0f04f5f6, 0xf0faf5f6, 0x00000f05, 0x05050f05, + 0xfafb0f05, 0x05000f05, 0xfb000f05, 0x00050f05, 0xfffb0f05, 0x0a0a0f05, 0xf5f60f05, 0x0f050f05, + 0xf0fb0f05, 0xfffff0fb, 0x0504f0fb, 0xfafaf0fb, 0x04fff0fb, 0xfafff0fb, 0x0004f0fb, 0xfffaf0fb, + 0x0a09f0fb, 0xf5f5f0fb, 0x0f04f0fb, 0xf0faf0fb, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000606, 0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x00000c0c, + 0xfffff3f4, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000bf4, 0xfffff40c, 0x000011fa, + 0xffffee06, 0xfffffa12, 0x000005ee, 0x0000180c, 0xffffe7f4, 0x00000c18, 0xfffff3e8, 0x00001818, + 0xffffe7e8, 0x00001e00, 0xffffe200, 0x0000001e, 0xffffffe2, 0x000017ee, 0xffffe812, 0xffffee18, + 0x000011e8, 0x0000301e, 0xffffcfe2, 0x00001e30, 0xffffe1d0, 0x000023fa, 0xffffdc06, 0xfffffa24, + 0x000005dc, 0x0000300c, 0xffffcff4, 0x00000c30, 0xfffff3d0, 0x00003030, 0xffffcfd0, 0x00003600, + 0xffffca00, 0x00000036, 0xffffffca, 0x000023dc, 0xffffdc24, 0x00002fe8, 0xffffd018, 0xffffe830, + 0x000017d0, 0x00004e1e, 0xffffb1e2, 0x00001e4e, 0xffffe1b2, 0x00005436, 0xffffabca, 0x00003654, + 0xffffc9ac, 0x000041ee, 0xffffbe12, 0xffffee42, 0x000011be, 0x0000540c, 0xffffabf4, 0x00000c54, + 0xfffff3ac, 0x00005a5a, 0xffffa5a6, 0x00005ffa, 0xffffa006, 0xfffffa60, 0x000005a0, 0x000041ca, + 0xffffbe36, 0xffffca42, 0x000035be, 0x000059d6, 0xffffa62a, 0xffffd65a, 0x000029a6, 0x00007de2, + 0xffff821e, 0xffffe27e, 0x00001d82, 0x0000659a, 0xffff9a66, 0x00007dac, 0xffff8254, 0xffffac7e, + 0x00005382, 0x00002424, 0xffffdbdc, 0x00004242, 0xffffbdbe, 0x00000000, 0x06060000, 0xf9fa0000, + 0x06000000, 0xfa000000, 0x00060000, 0xfffa0000, 0x0c0c0000, 0xf3f40000, 0x0c060000, 0xf3fa0000, + 0x060c0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x06000606, 0xfa000606, 0x00060606, 0xfffa0606, + 0x0c0c0606, 0xf3f40606, 0x0c060606, 0xf3fa0606, 0x060c0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa, + 0x05fff9fa, 0xf9fff9fa, 0x0005f9fa, 0xfff9f9fa, 0x0c0bf9fa, 0xf3f3f9fa, 0x0c05f9fa, 0xf3f9f9fa, + 0x060bf9fa, 0x00000600, 0x06060600, 0xf9fa0600, 0x06000600, 0xfa000600, 0x00060600, 0xfffa0600, + 0x0c0c0600, 0xf3f40600, 0x0c060600, 0xf3fa0600, 0x060c0600, 0xfffffa00, 0x0605fa00, 0xf9f9fa00, + 0x05fffa00, 0xf9fffa00, 0x0005fa00, 0xfff9fa00, 0x0c0bfa00, 0xf3f3fa00, 0x0c05fa00, 0xf3f9fa00, + 0x060bfa00, 0x00000006, 0x06060006, 0xf9fa0006, 0x06000006, 0xfa000006, 0x00060006, 0xfffa0006, + 0x0c0c0006, 0xf3f40006, 0x0c060006, 0xf3fa0006, 0x060c0006, 0xfffffffa, 0x0605fffa, 0xf9f9fffa, + 0x05fffffa, 0xf9fffffa, 0x0005fffa, 0xfff9fffa, 0x0c0bfffa, 0xf3f3fffa, 0x0c05fffa, 0xf3f9fffa, + 0x060bfffa, 0x00000c0c, 0x06060c0c, 0xf9fa0c0c, 0x06000c0c, 0xfa000c0c, 0x00060c0c, 0xfffa0c0c, + 0x0c0c0c0c, 0xf3f40c0c, 0x0c060c0c, 0xf3fa0c0c, 0x060c0c0c, 0xfffff3f4, 0x0605f3f4, 0xf9f9f3f4, + 0x05fff3f4, 0xf9fff3f4, 0x0005f3f4, 0xfff9f3f4, 0x0c0bf3f4, 0xf3f3f3f4, 0x0c05f3f4, 0xf3f9f3f4, + 0x060bf3f4, 0x00000c06, 0x06060c06, 0xf9fa0c06, 0x06000c06, 0xfa000c06, 0x00060c06, 0xfffa0c06, + 0x0c0c0c06, 0xf3f40c06, 0x0c060c06, 0xf3fa0c06, 0x060c0c06, 0xfffff3fa, 0x0605f3fa, 0xf9f9f3fa, + 0x05fff3fa, 0xf9fff3fa, 0x0005f3fa, 0xfff9f3fa, 0x0c0bf3fa, 0xf3f3f3fa, 0x0c05f3fa, 0xf3f9f3fa, + 0x060bf3fa, 0x0000060c, 0x0606060c, 0xf9fa060c, 0x0600060c, 0xfa00060c, 0x0006060c, 0xfffa060c, + 0x0c0c060c, 0xf3f4060c, 0x0c06060c, 0xf3fa060c, 0x060c060c, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000707, 0xfffff8f9, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x00000e0e, + 0xfffff1f2, 0x00001507, 0xffffeaf9, 0x00000715, 0xfffff8eb, 0x00000df2, 0xfffff20e, 0x000014f9, + 0xffffeb07, 0xfffff915, 0x000006eb, 0x00001c0e, 0xffffe3f2, 0x00000e1c, 0xfffff1e4, 0x00001c1c, + 0xffffe3e4, 0x00002300, 0xffffdd00, 0x00000023, 0xffffffdd, 0x00001beb, 0xffffe415, 0xffffeb1c, + 0x000014e4, 0x00003823, 0xffffc7dd, 0x00002338, 0xffffdcc8, 0x000029f2, 0xffffd60e, 0xfffff22a, + 0x00000dd6, 0x0000380e, 0xffffc7f2, 0x00000e38, 0xfffff1c8, 0x00003838, 0xffffc7c8, 0x00003f00, + 0xffffc100, 0x0000003f, 0xffffffc1, 0x000029d6, 0xffffd62a, 0x000037e4, 0xffffc81c, 0xffffe438, + 0x00001bc8, 0x00005b23, 0xffffa4dd, 0x0000235b, 0xffffdca5, 0x0000623f, 0xffff9dc1, 0x00003f62, + 0xffffc09e, 0x00004ceb, 0xffffb315, 0xffffeb4d, 0x000014b3, 0x0000620e, 0xffff9df2, 0x00000e62, + 0xfffff19e, 0x00006969, 0xffff9697, 0x000076f9, 0xffff8907, 0xfffff977, 0x00000689, 0x00004cc1, + 0xffffb33f, 0xffffc14d, 0x00003eb3, 0x000068cf, 0xffff9731, 0xffffcf69, 0x00003097, 0x00007689, + 0xffff8977, 0x00002a2a, 0xffffd5d6, 0x00004d4d, 0xffffb2b3, 0x00000000, 0x07070000, 0xf8f90000, + 0x07000000, 0xf9000000, 0x00070000, 0xfff90000, 0x0e0e0000, 0xf1f20000, 0x15070000, 0xeaf90000, + 0x07150000, 0x00000707, 0x07070707, 0xf8f90707, 0x07000707, 0xf9000707, 0x00070707, 0xfff90707, + 0x0e0e0707, 0xf1f20707, 0x15070707, 0xeaf90707, 0x07150707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9, + 0x06fff8f9, 0xf8fff8f9, 0x0006f8f9, 0xfff8f8f9, 0x0e0df8f9, 0xf1f1f8f9, 0x1506f8f9, 0xeaf8f8f9, + 0x0714f8f9, 0x00000700, 0x07070700, 0xf8f90700, 0x07000700, 0xf9000700, 0x00070700, 0xfff90700, + 0x0e0e0700, 0xf1f20700, 0x15070700, 0xeaf90700, 0x07150700, 0xfffff900, 0x0706f900, 0xf8f8f900, + 0x06fff900, 0xf8fff900, 0x0006f900, 0xfff8f900, 0x0e0df900, 0xf1f1f900, 0x1506f900, 0xeaf8f900, + 0x0714f900, 0x00000007, 0x07070007, 0xf8f90007, 0x07000007, 0xf9000007, 0x00070007, 0xfff90007, + 0x0e0e0007, 0xf1f20007, 0x15070007, 0xeaf90007, 0x07150007, 0xfffffff9, 0x0706fff9, 0xf8f8fff9, + 0x06fffff9, 0xf8fffff9, 0x0006fff9, 0xfff8fff9, 0x0e0dfff9, 0xf1f1fff9, 0x1506fff9, 0xeaf8fff9, + 0x0714fff9, 0x00000e0e, 0x07070e0e, 0xf8f90e0e, 0x07000e0e, 0xf9000e0e, 0x00070e0e, 0xfff90e0e, + 0x0e0e0e0e, 0xf1f20e0e, 0x15070e0e, 0xeaf90e0e, 0x07150e0e, 0xfffff1f2, 0x0706f1f2, 0xf8f8f1f2, + 0x06fff1f2, 0xf8fff1f2, 0x0006f1f2, 0xfff8f1f2, 0x0e0df1f2, 0xf1f1f1f2, 0x1506f1f2, 0xeaf8f1f2, + 0x0714f1f2, 0x00001507, 0x07071507, 0xf8f91507, 0x07001507, 0xf9001507, 0x00071507, 0xfff91507, + 0x0e0e1507, 0xf1f21507, 0x15071507, 0xeaf91507, 0x07151507, 0xffffeaf9, 0x0706eaf9, 0xf8f8eaf9, + 0x06ffeaf9, 0xf8ffeaf9, 0x0006eaf9, 0xfff8eaf9, 0x0e0deaf9, 0xf1f1eaf9, 0x1506eaf9, 0xeaf8eaf9, + 0x0714eaf9, 0x00000715, 0x07070715, 0xf8f90715, 0x07000715, 0xf9000715, 0x00070715, 0xfff90715, + 0x0e0e0715, 0xf1f20715, 0x15070715, 0xeaf90715, 0x07150715, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000808, 0xfffff7f8, 0x00000800, 0xfffff800, 0x00000008, 0xfffffff8, 0x00001010, + 0xffffeff0, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00000ff0, 0xfffff010, 0x000017f8, + 0xffffe808, 0xfffff818, 0x000007e8, 0x00002010, 0xffffdff0, 0x00001020, 0xffffefe0, 0x00002020, + 0xffffdfe0, 0x00002800, 0xffffd800, 0x00000028, 0xffffffd8, 0x00001fe8, 0xffffe018, 0xffffe820, + 0x000017e0, 0x00004028, 0xffffbfd8, 0x00002840, 0xffffd7c0, 0x00002ff0, 0xffffd010, 0xfffff030, + 0x00000fd0, 0x00004010, 0xffffbff0, 0x00001040, 0xffffefc0, 0x00004040, 0xffffbfc0, 0x00004800, + 0xffffb800, 0x00000048, 0xffffffb8, 0x00002fd0, 0xffffd030, 0x00003fe0, 0xffffc020, 0xffffe040, + 0x00001fc0, 0x00006828, 0xffff97d8, 0x00002868, 0xffffd798, 0x00007048, 0xffff8fb8, 0x00004870, + 0xffffb790, 0x000057e8, 0xffffa818, 0xffffe858, 0x000017a8, 0x00007010, 0xffff8ff0, 0x00001070, + 0xffffef90, 0x00007878, 0xffff8788, 0x000057b8, 0xffffa848, 0xffffb858, 0x000047a8, 0x000077c8, + 0xffff8838, 0xffffc878, 0x00003788, 0x00003030, 0xffffcfd0, 0x00005858, 0xffffa7a8, 0x00000000, + 0x08080000, 0xf7f80000, 0x08000000, 0xf8000000, 0x00080000, 0xfff80000, 0x10100000, 0xeff00000, + 0x10080000, 0xeff80000, 0x08100000, 0x00000808, 0x08080808, 0xf7f80808, 0x08000808, 0xf8000808, + 0x00080808, 0xfff80808, 0x10100808, 0xeff00808, 0x10080808, 0xeff80808, 0x08100808, 0xfffff7f8, + 0x0807f7f8, 0xf7f7f7f8, 0x07fff7f8, 0xf7fff7f8, 0x0007f7f8, 0xfff7f7f8, 0x100ff7f8, 0xefeff7f8, + 0x1007f7f8, 0xeff7f7f8, 0x080ff7f8, 0x00000800, 0x08080800, 0xf7f80800, 0x08000800, 0xf8000800, + 0x00080800, 0xfff80800, 0x10100800, 0xeff00800, 0x10080800, 0xeff80800, 0x08100800, 0xfffff800, + 0x0807f800, 0xf7f7f800, 0x07fff800, 0xf7fff800, 0x0007f800, 0xfff7f800, 0x100ff800, 0xefeff800, + 0x1007f800, 0xeff7f800, 0x080ff800, 0x00000008, 0x08080008, 0xf7f80008, 0x08000008, 0xf8000008, + 0x00080008, 0xfff80008, 0x10100008, 0xeff00008, 0x10080008, 0xeff80008, 0x08100008, 0xfffffff8, + 0x0807fff8, 0xf7f7fff8, 0x07fffff8, 0xf7fffff8, 0x0007fff8, 0xfff7fff8, 0x100ffff8, 0xefeffff8, + 0x1007fff8, 0xeff7fff8, 0x080ffff8, 0x00001010, 0x08081010, 0xf7f81010, 0x08001010, 0xf8001010, + 0x00081010, 0xfff81010, 0x10101010, 0xeff01010, 0x10081010, 0xeff81010, 0x08101010, 0xffffeff0, + 0x0807eff0, 0xf7f7eff0, 0x07ffeff0, 0xf7ffeff0, 0x0007eff0, 0xfff7eff0, 0x100feff0, 0xefefeff0, + 0x1007eff0, 0xeff7eff0, 0x080feff0, 0x00001008, 0x08081008, 0xf7f81008, 0x08001008, 0xf8001008, + 0x00081008, 0xfff81008, 0x10101008, 0xeff01008, 0x10081008, 0xeff81008, 0x08101008, 0xffffeff8, + 0x0807eff8, 0xf7f7eff8, 0x07ffeff8, 0xf7ffeff8, 0x0007eff8, 0xfff7eff8, 0x100feff8, 0xefefeff8, + 0x1007eff8, 0xeff7eff8, 0x080feff8, 0x00000810, 0x08080810, 0xf7f80810, 0x08000810, 0xf8000810, + 0x00080810, 0xfff80810, 0x10100810, 0xeff00810, 0x10080810, 0xeff80810, 0x08100810, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000909, 0xfffff6f7, 0x00000900, 0xfffff700, 0x00000009, 0xfffffff7, 0x00001212, + 0xffffedee, 0x00001b09, 0xffffe4f7, 0x0000091b, 0xfffff6e5, 0x000011ee, 0xffffee12, 0x00001af7, + 0xffffe509, 0xfffff71b, 0x000008e5, 0x00002412, 0xffffdbee, 0x00001224, 0xffffeddc, 0x00002424, + 0xffffdbdc, 0x00002d00, 0xffffd300, 0x0000002d, 0xffffffd3, 0x000023e5, 0xffffdc1b, 0xffffe524, + 0x00001adc, 0x0000482d, 0xffffb7d3, 0x00002d48, 0xffffd2b8, 0x000035ee, 0xffffca12, 0xffffee36, + 0x000011ca, 0x00004812, 0xffffb7ee, 0x00001248, 0xffffedb8, 0x00004848, 0xffffb7b8, 0x00005100, + 0xffffaf00, 0x00000051, 0xffffffaf, 0x000035ca, 0xffffca36, 0x000047dc, 0xffffb824, 0xffffdc48, + 0x000023b8, 0x0000752d, 0xffff8ad3, 0x00002d75, 0xffffd28b, 0x00007e51, 0xffff81af, 0x0000517e, + 0xffffae82, 0x000062e5, 0xffff9d1b, 0xffffe563, 0x00001a9d, 0x000062af, 0xffff9d51, 0xffffaf63, + 0x0000509d, 0x00003636, 0xffffc9ca, 0x00006c6c, 0xffff9394, 0x00000000, 0x09090000, 0xf6f70000, + 0x09000000, 0xf7000000, 0x00090000, 0xfff70000, 0x12120000, 0xedee0000, 0x1b090000, 0xe4f70000, + 0x091b0000, 0xf6e50000, 0x00000909, 0x09090909, 0xf6f70909, 0x09000909, 0xf7000909, 0x00090909, + 0xfff70909, 0x12120909, 0xedee0909, 0x1b090909, 0xe4f70909, 0x091b0909, 0xf6e50909, 0xfffff6f7, + 0x0908f6f7, 0xf6f6f6f7, 0x08fff6f7, 0xf6fff6f7, 0x0008f6f7, 0xfff6f6f7, 0x1211f6f7, 0xededf6f7, + 0x1b08f6f7, 0xe4f6f6f7, 0x091af6f7, 0xf6e4f6f7, 0x00000900, 0x09090900, 0xf6f70900, 0x09000900, + 0xf7000900, 0x00090900, 0xfff70900, 0x12120900, 0xedee0900, 0x1b090900, 0xe4f70900, 0x091b0900, + 0xf6e50900, 0xfffff700, 0x0908f700, 0xf6f6f700, 0x08fff700, 0xf6fff700, 0x0008f700, 0xfff6f700, + 0x1211f700, 0xededf700, 0x1b08f700, 0xe4f6f700, 0x091af700, 0xf6e4f700, 0x00000009, 0x09090009, + 0xf6f70009, 0x09000009, 0xf7000009, 0x00090009, 0xfff70009, 0x12120009, 0xedee0009, 0x1b090009, + 0xe4f70009, 0x091b0009, 0xf6e50009, 0xfffffff7, 0x0908fff7, 0xf6f6fff7, 0x08fffff7, 0xf6fffff7, + 0x0008fff7, 0xfff6fff7, 0x1211fff7, 0xededfff7, 0x1b08fff7, 0xe4f6fff7, 0x091afff7, 0xf6e4fff7, + 0x00001212, 0x09091212, 0xf6f71212, 0x09001212, 0xf7001212, 0x00091212, 0xfff71212, 0x12121212, + 0xedee1212, 0x1b091212, 0xe4f71212, 0x091b1212, 0xf6e51212, 0xffffedee, 0x0908edee, 0xf6f6edee, + 0x08ffedee, 0xf6ffedee, 0x0008edee, 0xfff6edee, 0x1211edee, 0xedededee, 0x1b08edee, 0xe4f6edee, + 0x091aedee, 0xf6e4edee, 0x00001b09, 0x09091b09, 0xf6f71b09, 0x09001b09, 0xf7001b09, 0x00091b09, + 0xfff71b09, 0x12121b09, 0xedee1b09, 0x1b091b09, 0xe4f71b09, 0x091b1b09, 0xf6e51b09, 0xffffe4f7, + 0x0908e4f7, 0xf6f6e4f7, 0x08ffe4f7, 0xf6ffe4f7, 0x0008e4f7, 0xfff6e4f7, 0x1211e4f7, 0xedede4f7, + 0x1b08e4f7, 0xe4f6e4f7, 0x091ae4f7, 0xf6e4e4f7, 0x0000091b, 0x0909091b, 0xf6f7091b, 0x0900091b, + 0xf700091b, 0x0009091b, 0xfff7091b, 0x1212091b, 0xedee091b, 0x1b09091b, 0xe4f7091b, 0x091b091b, + 0xf6e5091b, 0xfffff6e5, 0x0908f6e5, 0xf6f6f6e5, 0x08fff6e5, 0xf6fff6e5, 0x0008f6e5, 0xfff6f6e5, + 0x1211f6e5, 0xededf6e5, 0x1b08f6e5, 0xe4f6f6e5, 0x091af6e5, 0xf6e4f6e5, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606, + 0xfffff9fa, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x000004fb, 0xfffffb05, 0xfffffb05, + 0x000004fb, 0x00000b06, 0xfffff4fa, 0x0000060b, 0xfffff9f5, 0x00000800, 0xfffff800, 0x00000008, + 0xfffffff8, 0x00000b0b, 0xfffff4f5, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x0000110c, + 0xffffeef4, 0x00000c11, 0xfffff3ef, 0x00001111, 0xffffeeef, 0x00001206, 0xffffedfa, 0x00000612, + 0xfffff9ee, 0x00000af8, 0xfffff508, 0xfffff80b, 0x000007f5, 0x00000f00, 0xfffff100, 0x0000000f, + 0xfffffff1, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00001912, 0xffffe6ee, 0x00001219, + 0xffffede7, 0x0000190b, 0xffffe6f5, 0x00000b19, 0xfffff4e7, 0x00001919, 0xffffe6e7, 0x00000df2, + 0xfffff20e, 0xfffff20e, 0x00000df2, 0x00001a00, 0xffffe600, 0x0000001a, 0xffffffe6, 0x000011f5, + 0xffffee0b, 0xfffff512, 0x00000aee, 0x000015f9, 0xffffea07, 0xfffff916, 0x000006ea, 0x0000221a, + 0xffffdde6, 0x00001a22, 0xffffe5de, 0x00002212, 0xffffddee, 0x00001222, 0xffffedde, 0x00002222, + 0xffffddde, 0x0000230b, 0xffffdcf5, 0x00000b23, 0xfffff4dd, 0x00001d00, 0xffffe300, 0x0000001d, + 0xffffffe3, 0x000015ed, 0xffffea13, 0xffffed16, 0x000012ea, 0x000019f1, 0xffffe60f, 0xfffff11a, + 0x00000ee6, 0x00002500, 0xffffdb00, 0x00000025, 0xffffffdb, 0x00002c1b, 0xffffd3e5, 0x00001b2c, + 0xffffe4d4, 0x00002c24, 0xffffd3dc, 0x0000242c, 0xffffdbd4, 0x00002c12, 0xffffd3ee, 0x0000122c, + 0xffffedd4, 0x000020f6, 0xffffdf0a, 0xfffff621, 0x000009df, 0x00002d2d, 0xffffd2d3, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606, + 0xfffff9fa, 0x00000700, 0xfffff900, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020300, 0x0201fd00, + 0x02020003, 0x0201fffd, 0x02020606, 0x0201f9fa, 0x02020700, 0x0201f900, 0xfdfe0000, 0xfdfe0202, + 0xfdfdfdfe, 0xfdfe0300, 0xfdfdfd00, 0xfdfe0003, 0xfdfdfffd, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0700, + 0xfdfdf900, 0x03000000, 0x03000202, 0x02fffdfe, 0x03000300, 0x02fffd00, 0x03000003, 0x02fffffd, + 0x03000606, 0x02fff9fa, 0x03000700, 0x02fff900, 0xfd000000, 0xfd000202, 0xfcfffdfe, 0xfd000300, + 0xfcfffd00, 0xfd000003, 0xfcfffffd, 0xfd000606, 0xfcfff9fa, 0xfd000700, 0xfcfff900, 0x00030000, + 0x00030202, 0x0002fdfe, 0x00030300, 0x0002fd00, 0x00030003, 0x0002fffd, 0x00030606, 0x0002f9fa, + 0x00030700, 0x0002f900, 0xfffd0000, 0xfffd0202, 0xfffcfdfe, 0xfffd0300, 0xfffcfd00, 0xfffd0003, + 0xfffcfffd, 0xfffd0606, 0xfffcf9fa, 0xfffd0700, 0xfffcf900, 0x06060000, 0x06060202, 0x0605fdfe, + 0x06060300, 0x0605fd00, 0x06060003, 0x0605fffd, 0x06060606, 0x0605f9fa, 0x06060700, 0x0605f900, + 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0300, 0xf9f9fd00, 0xf9fa0003, 0xf9f9fffd, 0xf9fa0606, + 0xf9f9f9fa, 0xf9fa0700, 0xf9f9f900, 0x07000000, 0x07000202, 0x06fffdfe, 0x07000300, 0x06fffd00, + 0x07000003, 0x06fffffd, 0x07000606, 0x06fff9fa, 0x07000700, 0x06fff900, 0xf9000000, 0xf9000202, + 0xf8fffdfe, 0xf9000300, 0xf8fffd00, 0xf9000003, 0xf8fffffd, 0xf9000606, 0xf8fff9fa, 0xf9000700, + 0xf8fff900, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606, + 0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x000003fc, 0xfffffc04, 0xfffffa0a, + 0x000005f6, 0xfffff400, 0x00000c00, 0xfffff3fa, 0xfffff406, 0x00000bfa, 0x00000c06, 0xfffffff2, + 0x0000000e, 0x00000c0c, 0xfffff3f4, 0xffffee00, 0x00001200, 0xfffff40e, 0x00000bf2, 0xfffff9ee, + 0xfffffa12, 0x000005ee, 0x00000612, 0xffffedf6, 0xffffee0a, 0x000011f6, 0x0000120a, 0xffffffea, + 0x00000016, 0xffffe800, 0x00001800, 0xfffff3ea, 0xfffff416, 0x00000bea, 0x00000c16, 0xffffe7f8, + 0xffffe808, 0x000017f8, 0x00001808, 0xfffff9e6, 0xfffffa1a, 0x000005e6, 0x0000061a, 0xffffffe4, + 0x0000001c, 0x00001414, 0xffffebec, 0xffffe5f2, 0x00001a0e, 0xfffff3e2, 0x00000c1e, 0xffffdff6, + 0x0000200a, 0xffffdfee, 0x00002012, 0xffffe5e6, 0x00001a1a, 0xffffebde, 0x00001422, 0xfffff3da, + 0x00000c26, 0xffffdfe0, 0x00002020, 0x00002020, 0xffffd7ea, 0xffffddde, 0x00002222, 0x00000000, + 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, + 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002, + 0x01fffffe, 0x02000202, 0x01fffdfe, 0x02000606, 0x01fff9fa, 0x02000600, 0x01fffa00, 0x02000006, + 0x01fffffa, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000202, 0xfdfffdfe, + 0xfe000606, 0xfdfff9fa, 0xfe000600, 0xfdfffa00, 0xfe000006, 0xfdfffffa, 0x00020000, 0x00020200, + 0x0001fe00, 0x00020002, 0x0001fffe, 0x00020202, 0x0001fdfe, 0x00020606, 0x0001f9fa, 0x00020600, + 0x0001fa00, 0x00020006, 0x0001fffa, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe, + 0xfffe0202, 0xfffdfdfe, 0xfffe0606, 0xfffdf9fa, 0xfffe0600, 0xfffdfa00, 0xfffe0006, 0xfffdfffa, + 0x02020000, 0x02020200, 0x0201fe00, 0x02020002, 0x0201fffe, 0x02020202, 0x0201fdfe, 0x02020606, + 0x0201f9fa, 0x02020600, 0x0201fa00, 0x02020006, 0x0201fffa, 0xfdfe0000, 0xfdfe0200, 0xfdfdfe00, + 0xfdfe0002, 0xfdfdfffe, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0600, 0xfdfdfa00, + 0xfdfe0006, 0xfdfdfffa, 0x06060000, 0x06060200, 0x0605fe00, 0x06060002, 0x0605fffe, 0x06060202, + 0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060600, 0x0605fa00, 0x06060006, 0x0605fffa, 0xf9fa0000, + 0xf9fa0200, 0xf9f9fe00, 0xf9fa0002, 0xf9f9fffe, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, + 0xf9fa0600, 0xf9f9fa00, 0xf9fa0006, 0xf9f9fffa, 0x06000000, 0x06000200, 0x05fffe00, 0x06000002, + 0x05fffffe, 0x06000202, 0x05fffdfe, 0x06000606, 0x05fff9fa, 0x06000600, 0x05fffa00, 0x06000006, + 0x05fffffa, 0xfa000000, 0xfa000200, 0xf9fffe00, 0xfa000002, 0xf9fffffe, 0xfa000202, 0xf9fffdfe, + 0xfa000606, 0xf9fff9fa, 0xfa000600, 0xf9fffa00, 0xfa000006, 0xf9fffffa, 0x00060000, 0x00060200, + 0x0005fe00, 0x00060002, 0x0005fffe, 0x00060202, 0x0005fdfe, 0x00060606, 0x0005f9fa, 0x00060600, + 0x0005fa00, 0x00060006, 0x0005fffa, 0xfffa0000, 0xfffa0200, 0xfff9fe00, 0xfffa0002, 0xfff9fffe, + 0xfffa0202, 0xfff9fdfe, 0xfffa0606, 0xfff9f9fa, 0xfffa0600, 0xfff9fa00, 0xfffa0006, 0xfff9fffa, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a, + 0xfffff5f6, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000005fa, 0xfffffa06, 0xfffff80e, + 0x000007f2, 0xffffffee, 0x00000012, 0xfffff00a, 0x00000ff6, 0xffffe800, 0x00001800, 0xfffff7e8, + 0xfffff818, 0x000007e8, 0x00000818, 0x00001212, 0xffffedee, 0xfffff014, 0x00000fec, 0xffffe5f2, + 0xffffe60e, 0x000019f2, 0x00001a0e, 0xffffffe2, 0x0000001e, 0xffffde00, 0x00002200, 0xfffff7de, + 0xfffff822, 0x000007de, 0x00000822, 0xffffede2, 0xffffee1e, 0x000011e2, 0x0000121e, 0xffffddf6, + 0xffffde0a, 0x000021f6, 0x0000220a, 0xffffddec, 0x00002214, 0xffffffd8, 0x00000028, 0x00001e1e, + 0xffffe1e2, 0xffffedd8, 0x00001228, 0xffffd400, 0x00002c00, 0xffffd3f0, 0x00002c10, 0xffffdbdc, + 0xffffdbdc, 0x00002424, 0xffffd3e6, 0x00002c1a, 0xffffe5d2, 0x00001a2e, 0xffffedcc, 0x00001234, + 0xffffc9ec, 0xffffd3d4, 0x00002c2c, 0xffffc9e0, 0xffffd1d2, 0xffffd1d2, 0x00002e2e, 0x00000000, + 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a, 0xfffff5f6, + 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002, + 0x01fffffe, 0x02000404, 0x01fffbfc, 0x02000a0a, 0x01fff5f6, 0x02000a00, 0x01fff600, 0x0200000a, + 0x01fffff6, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000404, 0xfdfffbfc, + 0xfe000a0a, 0xfdfff5f6, 0xfe000a00, 0xfdfff600, 0xfe00000a, 0xfdfffff6, 0x00020000, 0x00020200, + 0x0001fe00, 0x00020002, 0x0001fffe, 0x00020404, 0x0001fbfc, 0x00020a0a, 0x0001f5f6, 0x00020a00, + 0x0001f600, 0x0002000a, 0x0001fff6, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe, + 0xfffe0404, 0xfffdfbfc, 0xfffe0a0a, 0xfffdf5f6, 0xfffe0a00, 0xfffdf600, 0xfffe000a, 0xfffdfff6, + 0x04040000, 0x04040200, 0x0403fe00, 0x04040002, 0x0403fffe, 0x04040404, 0x0403fbfc, 0x04040a0a, + 0x0403f5f6, 0x04040a00, 0x0403f600, 0x0404000a, 0x0403fff6, 0xfbfc0000, 0xfbfc0200, 0xfbfbfe00, + 0xfbfc0002, 0xfbfbfffe, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0a0a, 0xfbfbf5f6, 0xfbfc0a00, 0xfbfbf600, + 0xfbfc000a, 0xfbfbfff6, 0x0a0a0000, 0x0a0a0200, 0x0a09fe00, 0x0a0a0002, 0x0a09fffe, 0x0a0a0404, + 0x0a09fbfc, 0x0a0a0a0a, 0x0a09f5f6, 0x0a0a0a00, 0x0a09f600, 0x0a0a000a, 0x0a09fff6, 0xf5f60000, + 0xf5f60200, 0xf5f5fe00, 0xf5f60002, 0xf5f5fffe, 0xf5f60404, 0xf5f5fbfc, 0xf5f60a0a, 0xf5f5f5f6, + 0xf5f60a00, 0xf5f5f600, 0xf5f6000a, 0xf5f5fff6, 0x0a000000, 0x0a000200, 0x09fffe00, 0x0a000002, + 0x09fffffe, 0x0a000404, 0x09fffbfc, 0x0a000a0a, 0x09fff5f6, 0x0a000a00, 0x09fff600, 0x0a00000a, + 0x09fffff6, 0xf6000000, 0xf6000200, 0xf5fffe00, 0xf6000002, 0xf5fffffe, 0xf6000404, 0xf5fffbfc, + 0xf6000a0a, 0xf5fff5f6, 0xf6000a00, 0xf5fff600, 0xf600000a, 0xf5fffff6, 0x000a0000, 0x000a0200, + 0x0009fe00, 0x000a0002, 0x0009fffe, 0x000a0404, 0x0009fbfc, 0x000a0a0a, 0x0009f5f6, 0x000a0a00, + 0x0009f600, 0x000a000a, 0x0009fff6, 0xfff60000, 0xfff60200, 0xfff5fe00, 0xfff60002, 0xfff5fffe, + 0xfff60404, 0xfff5fbfc, 0xfff60a0a, 0xfff5f5f6, 0xfff60a00, 0xfff5f600, 0xfff6000a, 0xfff5fff6, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c, + 0xfffff3f4, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x000007f8, 0xfffff808, 0xfffff008, + 0x00000ff8, 0xffffe800, 0x00001800, 0xfffff7e8, 0xfffff818, 0x000007e8, 0x00000818, 0xfffff014, + 0x00000fec, 0xffffffe4, 0x0000001c, 0xffffe7f0, 0xffffe810, 0x000017f0, 0x00001810, 0xffffe000, + 0x00002000, 0xffffefe4, 0xfffff01c, 0x00000fe4, 0x0000101c, 0xffffdff8, 0xffffe008, 0xfffff7e0, + 0xfffff820, 0x000007e0, 0x00000820, 0x00001ff8, 0x00002008, 0x00001818, 0xffffe7e8, 0xffffe818, + 0x000017e8, 0xffffdfec, 0x00002014, 0xffffffd8, 0x00000028, 0xffffefd8, 0x00001028, 0xffffd400, + 0xffffd400, 0xffffffd4, 0x0000002c, 0x00002c00, 0x00002c00, 0xffffdfe0, 0x00002020, 0xffffd3f0, + 0x00002c10, 0xffffd3e8, 0xffffe7d4, 0x0000182c, 0x00002c18, 0xffffefd0, 0x00001030, 0xffffdbdc, + 0xffffdbdc, 0x00002424, 0x00002424, 0xffffcbec, 0x00002828, 0xffffd7d8, 0xffffcbe0, 0x00000000, + 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c, 0xfffff3f4, + 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x04000000, 0x04000400, 0x03fffc00, 0x04000004, + 0x03fffffc, 0x04000404, 0x03fffbfc, 0x04000c0c, 0x03fff3f4, 0x04000c00, 0x03fff400, 0x0400000c, + 0x03fffff4, 0xfc000000, 0xfc000400, 0xfbfffc00, 0xfc000004, 0xfbfffffc, 0xfc000404, 0xfbfffbfc, + 0xfc000c0c, 0xfbfff3f4, 0xfc000c00, 0xfbfff400, 0xfc00000c, 0xfbfffff4, 0x00040000, 0x00040400, + 0x0003fc00, 0x00040004, 0x0003fffc, 0x00040404, 0x0003fbfc, 0x00040c0c, 0x0003f3f4, 0x00040c00, + 0x0003f400, 0x0004000c, 0x0003fff4, 0xfffc0000, 0xfffc0400, 0xfffbfc00, 0xfffc0004, 0xfffbfffc, + 0xfffc0404, 0xfffbfbfc, 0xfffc0c0c, 0xfffbf3f4, 0xfffc0c00, 0xfffbf400, 0xfffc000c, 0xfffbfff4, + 0x04040000, 0x04040400, 0x0403fc00, 0x04040004, 0x0403fffc, 0x04040404, 0x0403fbfc, 0x04040c0c, + 0x0403f3f4, 0x04040c00, 0x0403f400, 0x0404000c, 0x0403fff4, 0xfbfc0000, 0xfbfc0400, 0xfbfbfc00, + 0xfbfc0004, 0xfbfbfffc, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0c0c, 0xfbfbf3f4, 0xfbfc0c00, 0xfbfbf400, + 0xfbfc000c, 0xfbfbfff4, 0x0c0c0000, 0x0c0c0400, 0x0c0bfc00, 0x0c0c0004, 0x0c0bfffc, 0x0c0c0404, + 0x0c0bfbfc, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c0c00, 0x0c0bf400, 0x0c0c000c, 0x0c0bfff4, 0xf3f40000, + 0xf3f40400, 0xf3f3fc00, 0xf3f40004, 0xf3f3fffc, 0xf3f40404, 0xf3f3fbfc, 0xf3f40c0c, 0xf3f3f3f4, + 0xf3f40c00, 0xf3f3f400, 0xf3f4000c, 0xf3f3fff4, 0x0c000000, 0x0c000400, 0x0bfffc00, 0x0c000004, + 0x0bfffffc, 0x0c000404, 0x0bfffbfc, 0x0c000c0c, 0x0bfff3f4, 0x0c000c00, 0x0bfff400, 0x0c00000c, + 0x0bfffff4, 0xf4000000, 0xf4000400, 0xf3fffc00, 0xf4000004, 0xf3fffffc, 0xf4000404, 0xf3fffbfc, + 0xf4000c0c, 0xf3fff3f4, 0xf4000c00, 0xf3fff400, 0xf400000c, 0xf3fffff4, 0x000c0000, 0x000c0400, + 0x000bfc00, 0x000c0004, 0x000bfffc, 0x000c0404, 0x000bfbfc, 0x000c0c0c, 0x000bf3f4, 0x000c0c00, + 0x000bf400, 0x000c000c, 0x000bfff4, 0xfff40000, 0xfff40400, 0xfff3fc00, 0xfff40004, 0xfff3fffc, + 0xfff40404, 0xfff3fbfc, 0xfff40c0c, 0xfff3f3f4, 0xfff40c00, 0xfff3f400, 0xfff4000c, 0xfff3fff4, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, + 0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, + 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, + 0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, + 0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, + 0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, + 0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, + 0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, + 0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, + 0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, + 0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, + 0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, + 0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, + 0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, + 0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, + 0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, + 0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, + 0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, + 0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, + 0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, + 0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, + 0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, + 0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, + 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, + 0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, + 0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, + 0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, + 0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, + 0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, + 0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, + 0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, + 0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, + 0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, + 0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, + 0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, + 0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, + 0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, + 0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, + 0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, + 0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, + 0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, + 0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, + 0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, + 0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, + 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, + 0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, + 0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, + 0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, + 0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, + 0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, + 0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, + 0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, + 0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, + 0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, + 0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, + 0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, + 0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, + 0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, + 0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, + 0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, + 0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, + 0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, + 0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, + 0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, + 0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, + 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, + 0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, + 0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, + 0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, + 0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, + 0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, + 0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, + 0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, + 0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, + 0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, + 0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, + 0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, + 0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, + 0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, + 0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, + 0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, + 0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, + 0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, + 0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, + 0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + + +static const uint32_t correctionloworder[] = { + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x04040404, + 0xfbfbfbfc, 0x05050101, 0xfafafeff, 0x01010505, 0xfefefafb, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe, + 0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x09090404, 0xf6f6fbfc, 0x04040909, 0xfbfbf6f7, 0x09090909, + 0xf6f6f6f7, 0x0a0a0101, 0xf5f5feff, 0x01010a0a, 0xfefef5f6, 0x0807fafb, 0xf7f80505, 0xfafb0808, + 0x0504f7f8, 0x0f0f0909, 0xf0f0f6f7, 0x09090f0f, 0xf6f6f0f1, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, + 0x0302f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000, + 0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff6f7, 0xeff00909, 0xf6f71010, + 0x0908eff0, 0x1b1b0b0b, 0xe4e4f4f5, 0x0b0b1b1b, 0xf4f4e4e5, 0x1c1c1313, 0xe3e3eced, 0x13131c1c, + 0xecece3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1d1d0404, 0xe2e2fbfc, 0x04041d1d, + 0xfbfbe2e3, 0x1e1e1e1e, 0xe1e1e1e2, 0x2120fdfe, 0xdedf0202, 0xfdfe2121, 0x0201dedf, 0x1716edee, + 0xe8e91212, 0xedee1717, 0x1211e8e9, 0x1e1df0f1, 0xe1e20f0f, 0xf0f11e1e, 0x0f0ee1e2, 0x2e2e1616, + 0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31312323, + 0xcecedcdd, 0x23233131, 0xdcdccecf, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929, 0x0b0ad6d7, 0x33330404, + 0xccccfbfc, 0x04043333, 0xfbfbcccd, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e2e3, + 0xd5d61d1d, 0xe2e32a2a, 0x1d1cd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1b1b, + 0xb3b3e4e5, 0x1b1b4c4c, 0xe4e4b3b4, 0x4d4d2b2b, 0xb2b2d4d5, 0x2b2b4d4d, 0xd4d4b2b3, 0x3736e7e8, + 0xc8c91818, 0xe7e83737, 0x1817c8c9, 0x4f4f0e0e, 0xb0b0f1f2, 0x0e0e4f4f, 0xf1f1b0b1, 0x53533f3f, + 0xacacc0c1, 0x3f3f5353, 0xc0c0acad, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202, + 0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5d5d5d5d, 0xa2a2a2a3, 0x3d3ccbcc, 0xc2c33434, 0xcbcc3d3d, + 0x3433c2c3, 0x78783434, 0x8787cbcc, 0x34347878, 0xcbcb8788, 0x4b4ad2d3, 0xb4b52d2d, 0xd2d34b4b, + 0x2d2cb4b5, 0x7d7d4b4b, 0x8282b4b5, 0x4b4b7d7d, 0xb4b48283, 0x7a7a2121, 0x8585dedf, 0x21217a7a, + 0xdede8586, 0x6766f2f3, 0x98990d0d, 0xf2f36767, 0x0d0c9899, 0x605fd7d8, 0x9fa02828, 0xd7d86060, + 0x28279fa0, 0x7f7eddde, 0x80812222, 0xddde7f7f, 0x22218081, 0x5958a6a7, 0xa6a75959, 0x6968b1b2, + 0x96974e4e, 0xb1b26969, 0x4e4d9697, 0x0c0c0c0c, 0xf3f3f3f4, 0x17171717, 0xe8e8e8e9, 0x2a2a2a2a, + 0xd5d5d5d6, 0x49494949, 0xb6b6b6b7, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0xfcfd0101, + 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfeff0303, 0xfeff0303, + 0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, + 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, + 0xf8f8f8f9, 0x08080202, 0xf7f7fdfe, 0x02020808, 0xfdfdf7f8, 0x0908fdfe, 0xf6f70202, 0xfdfe0909, + 0x0201f6f7, 0x0605f9fa, 0xf9fa0606, 0x0d0d0606, 0xf2f2f9fa, 0x06060d0d, 0xf9f9f2f3, 0x0d0d0d0d, + 0xf2f2f2f3, 0x0e0e0101, 0xf1f1feff, 0x01010e0e, 0xfefef1f2, 0x0c0bf7f8, 0xf3f40808, 0xf7f80c0c, + 0x0807f3f4, 0x17170e0e, 0xe8e8f1f2, 0x0e0e1717, 0xf1f1e8e9, 0x1211fafb, 0xedee0505, 0xfafb1212, + 0x0504edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1afeff, + 0xe4e50101, 0xfeff1b1b, 0x0100e4e5, 0x1110eeef, 0xeeef1111, 0x1716f2f3, 0xe8e90d0d, 0xf2f31717, + 0x0d0ce8e9, 0x28281010, 0xd7d7eff0, 0x10102828, 0xefefd7d8, 0x29291c1c, 0xd6d6e3e4, 0x1c1c2929, + 0xe3e3d6d7, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2b2b0606, 0xd4d4f9fa, 0x06062b2b, + 0xf9f9d4d5, 0x2e2e2e2e, 0xd1d1d1d2, 0x3231fbfc, 0xcdce0404, 0xfbfc3232, 0x0403cdce, 0x2221e4e5, + 0xddde1b1b, 0xe4e52222, 0x1b1addde, 0x2d2ce9ea, 0xd2d31616, 0xe9ea2d2d, 0x1615d2d3, 0x45452222, + 0xbabaddde, 0x22224545, 0xddddbabb, 0x46461313, 0xb9b9eced, 0x13134646, 0xececb9ba, 0x49493535, + 0xb6b6cacb, 0x35354949, 0xcacab6b7, 0x3e3deeef, 0xc1c21111, 0xeeef3e3e, 0x1110c1c2, 0x4d4d0505, + 0xb2b2fafb, 0x05054d4d, 0xfafab2b3, 0x52525252, 0xadadadae, 0x3332cccd, 0xcccd3333, 0x403fd4d5, + 0xbfc02b2b, 0xd4d54040, 0x2b2abfc0, 0x5a59f5f6, 0xa5a60a0a, 0xf5f65a5a, 0x0a09a5a6, 0x72722929, + 0x8d8dd6d7, 0x29297272, 0xd6d68d8e, 0x74744040, 0x8b8bbfc0, 0x40407474, 0xbfbf8b8c, 0x5251dadb, + 0xadae2525, 0xdadb5252, 0x2524adae, 0x77771616, 0x8888e9ea, 0x16167777, 0xe9e98889, 0x7c7c5f5f, + 0x8383a0a1, 0x5f5f7c7c, 0xa0a08384, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5c5bb1b2, + 0xa3a44e4e, 0xb1b25c5c, 0x4e4da3a4, 0x7170bbbc, 0x8e8f4444, 0xbbbc7171, 0x44438e8f, 0x12121212, + 0xedededee, 0x22222222, 0xddddddde, 0x3f3f3f3f, 0xc0c0c0c1, 0x6d6d6d6d, 0x92929293, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, + 0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, + 0xfcfcfcfd, 0xfcfcfcfd, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, + 0x0403feff, 0x0403feff, 0x0403feff, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, + 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, + 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, + 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x07070707, 0x07070707, + 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, + 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, + 0xf5f5fcfd, 0x03030a0a, 0xfcfcf5f6, 0x09090909, 0xf6f6f6f7, 0x0706f8f9, 0xf8f90707, 0x0c0bfcfd, + 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x11110808, 0xeeeef7f8, 0x08081111, 0xf7f7eeef, 0x11111111, + 0xeeeeeeef, 0x13130101, 0xececfeff, 0x01011313, 0xfefeeced, 0x100ff4f5, 0xeff00b0b, 0xf4f51010, + 0x0b0aeff0, 0x1716f9fa, 0xe8e90606, 0xf9fa1717, 0x0605e8e9, 0x1f1f1212, 0xe0e0edee, 0x12121f1f, + 0xedede0e1, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x21212121, 0xdedededf, 0x2423feff, + 0xdbdc0101, 0xfeff2424, 0x0100dbdc, 0x1716e8e9, 0xe8e91717, 0x1f1eeeef, 0xe0e11111, 0xeeef1f1f, + 0x1110e0e1, 0x36361515, 0xc9c9eaeb, 0x15153636, 0xeaeac9ca, 0x37372525, 0xc8c8dadb, 0x25253737, + 0xdadac8c9, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x39390808, 0xc6c6f7f8, 0x08083939, + 0xf7f7c6c7, 0x3d3d3d3d, 0xc2c2c2c3, 0x4241fafb, 0xbdbe0505, 0xfafb4242, 0x0504bdbe, 0x2d2cdbdc, + 0xd2d32424, 0xdbdc2d2d, 0x2423d2d3, 0x3c3be2e3, 0xc3c41d1d, 0xe2e33c3c, 0x1d1cc3c4, 0x5c5c2d2d, + 0xa3a3d2d3, 0x2d2d5c5c, 0xd2d2a3a4, 0x5d5d1919, 0xa2a2e6e7, 0x19195d5d, 0xe6e6a2a3, 0x61614747, + 0x9e9eb8b9, 0x47476161, 0xb8b89e9f, 0x5352e9ea, 0xacad1616, 0xe9ea5353, 0x1615acad, 0x66660707, + 0x9999f8f9, 0x07076666, 0xf8f8999a, 0x6d6d6d6d, 0x92929293, 0x4443bbbc, 0xbbbc4444, 0x5554c6c7, + 0xaaab3939, 0xc6c75555, 0x3938aaab, 0x7877f2f3, 0x87880d0d, 0xf2f37878, 0x0d0c8788, 0x6e6dcecf, + 0x91923131, 0xcecf6e6e, 0x31309192, 0x7b7a9798, 0x84856868, 0x97987b7b, 0x68678485, 0x18181818, + 0xe7e7e7e8, 0x2e2e2e2e, 0xd1d1d1d2, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404, + 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, + 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, + 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, + 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0xfafb0101, 0xfafb0101, 0xfafb0101, + 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfeff0505, + 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, + 0xfeff0505, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, + 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, + 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, + 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0x03030a0a, + 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, + 0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, + 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x03030d0d, 0xfcfcf2f3, 0x0908f6f7, 0xf6f70909, 0x0f0efbfc, + 0xf0f10404, 0xfbfc0f0f, 0x0403f0f1, 0x16160b0b, 0xe9e9f4f5, 0x0b0b1616, 0xf4f4e9ea, 0x15151515, + 0xeaeaeaeb, 0x18180202, 0xe7e7fdfe, 0x02021818, 0xfdfde7e8, 0x1413f1f2, 0xebec0e0e, 0xf1f21414, + 0x0e0debec, 0x26261717, 0xd9d9e8e9, 0x17172626, 0xe8e8d9da, 0x1d1cf7f8, 0xe2e30808, 0xf7f81d1d, + 0x0807e2e3, 0x27270b0b, 0xd8d8f4f5, 0x0b0b2727, 0xf4f4d8d9, 0x29292929, 0xd6d6d6d7, 0x2d2cfeff, + 0xd2d30101, 0xfeff2d2d, 0x0100d2d3, 0x1d1ce2e3, 0xe2e31d1d, 0x2726e9ea, 0xd8d91616, 0xe9ea2727, + 0x1615d8d9, 0x43431b1b, 0xbcbce4e5, 0x1b1b4343, 0xe4e4bcbd, 0x45452f2f, 0xbabad0d1, 0x2f2f4545, + 0xd0d0babb, 0x3837f0f1, 0xc7c80f0f, 0xf0f13838, 0x0f0ec7c8, 0x47470b0b, 0xb8b8f4f5, 0x0b0b4747, + 0xf4f4b8b9, 0x4c4c4c4c, 0xb3b3b3b4, 0x5352f9fa, 0xacad0606, 0xf9fa5353, 0x0605acad, 0x3938d2d3, + 0xc6c72d2d, 0xd2d33939, 0x2d2cc6c7, 0x4b4adbdc, 0xb4b52424, 0xdbdc4b4b, 0x2423b4b5, 0x73733838, + 0x8c8cc7c8, 0x38387373, 0xc7c78c8d, 0x75751f1f, 0x8a8ae0e1, 0x1f1f7575, 0xe0e08a8b, 0x7a7a5858, + 0x8585a7a8, 0x58587a7a, 0xa7a78586, 0x6867e3e4, 0x97981c1c, 0xe3e46868, 0x1c1b9798, 0x5554aaab, + 0xaaab5555, 0x6a69b7b8, 0x95964848, 0xb7b86a6a, 0x48479596, 0x1e1e1e1e, 0xe1e1e1e2, 0x3a3a3a3a, + 0xc5c5c5c6, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505, + 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, + 0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, + 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, + 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0xf8f90202, + 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, + 0xf8f90202, 0xf8f90202, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, + 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, + 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, + 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, + 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, + 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0x0d0d0303, 0x0d0d0303, + 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, + 0x0d0d0303, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, + 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, + 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0xfbfbf0f1, 0x0b0af4f5, 0xf4f50b0b, 0x1211fafb, + 0xedee0505, 0xfafb1212, 0x0504edee, 0x1a1a0d0d, 0xe5e5f2f3, 0x0d0d1a1a, 0xf2f2e5e6, 0x1a1a1a1a, + 0xe5e5e5e6, 0x1d1d0202, 0xe2e2fdfe, 0x02021d1d, 0xfdfde2e3, 0x1817eff0, 0xe7e81010, 0xeff01818, + 0x100fe7e8, 0x2e2e1c1c, 0xd1d1e3e4, 0x1c1c2e2e, 0xe3e3d1d2, 0x2322f6f7, 0xdcdd0909, 0xf6f72323, + 0x0908dcdd, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31313131, 0xcecececf, 0x3635feff, + 0xc9ca0101, 0xfeff3636, 0x0100c9ca, 0x2322dcdd, 0xdcdd2323, 0x2f2ee5e6, 0xd0d11a1a, 0xe5e62f2f, + 0x1a19d0d1, 0x51512020, 0xaeaedfe0, 0x20205151, 0xdfdfaeaf, 0x53533838, 0xacacc7c8, 0x38385353, + 0xc7c7acad, 0x4342edee, 0xbcbd1212, 0xedee4343, 0x1211bcbd, 0x56560d0d, 0xa9a9f2f3, 0x0d0d5656, + 0xf2f2a9aa, 0x5b5b5b5b, 0xa4a4a4a5, 0x6362f8f9, 0x9c9d0707, 0xf8f96363, 0x07069c9d, 0x4443c9ca, + 0xbbbc3636, 0xc9ca4444, 0x3635bbbc, 0x5a59d3d4, 0xa5a62c2c, 0xd3d45a5a, 0x2c2ba5a6, 0x7c7bdedf, + 0x83842121, 0xdedf7c7c, 0x21208384, 0x67669899, 0x98996767, 0x7f7ea9aa, 0x80815656, 0xa9aa7f7f, + 0x56558081, 0x25252525, 0xdadadadb, 0x45454545, 0xbabababb, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, + 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0xf7f80202, 0xf7f80202, 0xf7f80202, + 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, + 0xf7f80202, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, + 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, + 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, + 0x0201f7f8, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, + 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, + 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, + 0xf2f2f2f3, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, + 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, + 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, + 0xf0f0fbfc, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, + 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, + 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0xfafaedee, 0x0d0cf2f3, 0xf2f30d0d, 0x1514f9fa, + 0xeaeb0606, 0xf9fa1515, 0x0605eaeb, 0x1e1e0f0f, 0xe1e1f0f1, 0x0f0f1e1e, 0xf0f0e1e2, 0x1e1e1e1e, + 0xe1e1e1e2, 0x22220202, 0xddddfdfe, 0x02022222, 0xfdfdddde, 0x1c1beced, 0xe3e41313, 0xeced1c1c, + 0x1312e3e4, 0x36362020, 0xc9c9dfe0, 0x20203636, 0xdfdfc9ca, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929, + 0x0b0ad6d7, 0x37370f0f, 0xc8c8f0f1, 0x0f0f3737, 0xf0f0c8c9, 0x39393939, 0xc6c6c6c7, 0x3f3efeff, + 0xc0c10101, 0xfeff3f3f, 0x0100c0c1, 0x2827d7d8, 0xd7d82828, 0x3736e1e2, 0xc8c91e1e, 0xe1e23737, + 0x1e1dc8c9, 0x5e5e2525, 0xa1a1dadb, 0x25255e5e, 0xdadaa1a2, 0x60604141, 0x9f9fbebf, 0x41416060, + 0xbebe9fa0, 0x4e4deaeb, 0xb1b21515, 0xeaeb4e4e, 0x1514b1b2, 0x64640f0f, 0x9b9bf0f1, 0x0f0f6464, + 0xf0f09b9c, 0x6a6a6a6a, 0x95959596, 0x7473f7f8, 0x8b8c0808, 0xf7f87474, 0x08078b8c, 0x4f4ec0c1, + 0xb0b13f3f, 0xc0c14f4f, 0x3f3eb0b1, 0x6968cccd, 0x96973333, 0xcccd6969, 0x33329697, 0x78778788, + 0x87887878, 0x2b2b2b2b, 0xd4d4d4d5, 0x50505050, 0xafafafb0, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, + 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, + 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, + 0xf8f8f8f9, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, + 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0xf5f60303, 0xf5f60303, 0xf5f60303, + 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, + 0xf5f60303, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, + 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, + 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, + 0x0302f5f6, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0, 0xefefeff0, 0xefefeff0, + 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, + 0xefefeff0, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, + 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0xededfafb, 0xededfafb, 0xededfafb, + 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, + 0xededfafb, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, + 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, + 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0xfafaebec, 0x0f0ef0f1, 0xf0f10f0f, 0x1817f8f9, + 0xe7e80707, 0xf8f91818, 0x0706e7e8, 0x23231111, 0xdcdceeef, 0x11112323, 0xeeeedcdd, 0x22222222, + 0xddddddde, 0x26260303, 0xd9d9fcfd, 0x03032626, 0xfcfcd9da, 0x201fe9ea, 0xdfe01616, 0xe9ea2020, + 0x1615dfe0, 0x3d3d2525, 0xc2c2dadb, 0x25253d3d, 0xdadac2c3, 0x2f2ef2f3, 0xd0d10d0d, 0xf2f32f2f, + 0x0d0cd0d1, 0x3f3f1111, 0xc0c0eeef, 0x11113f3f, 0xeeeec0c1, 0x41414141, 0xbebebebf, 0x4847feff, + 0xb7b80101, 0xfeff4848, 0x0100b7b8, 0x2e2dd1d2, 0xd1d22e2e, 0x3f3edcdd, 0xc0c12323, 0xdcdd3f3f, + 0x2322c0c1, 0x6b6b2b2b, 0x9494d4d5, 0x2b2b6b6b, 0xd4d49495, 0x6e6e4b4b, 0x9191b4b5, 0x4b4b6e6e, + 0xb4b49192, 0x5958e7e8, 0xa6a71818, 0xe7e85959, 0x1817a6a7, 0x72721111, 0x8d8deeef, 0x11117272, + 0xeeee8d8e, 0x79797979, 0x86868687, 0x5b5ab7b8, 0xa4a54848, 0xb7b85b5b, 0x4847a4a5, 0x7877c5c6, + 0x87883a3a, 0xc5c67878, 0x3a398788, 0x31313131, 0xcecececf, 0x5c5c5c5c, 0xa3a3a3a4, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, + 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8, + 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, + 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, + 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0xf4f50303, + 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, + 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, + 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0x0302f4f5, + 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, + 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, + 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee, + 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, + 0xedededee, 0xedededee, 0xedededee, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, + 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0xebebfafb, + 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, + 0xebebfafb, 0xebebfafb, 0xebebfafb, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, + 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, + 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x1110eeef, 0xeeef1111, 0x1b1af8f9, + 0xe4e50707, 0xf8f91b1b, 0x0706e4e5, 0x27271313, 0xd8d8eced, 0x13132727, 0xececd8d9, 0x27272727, + 0xd8d8d8d9, 0x2b2b0303, 0xd4d4fcfd, 0x03032b2b, 0xfcfcd4d5, 0x2423e7e8, 0xdbdc1818, 0xe7e82424, + 0x1817dbdc, 0x45452a2a, 0xbabad5d6, 0x2a2a4545, 0xd5d5babb, 0x3534f1f2, 0xcacb0e0e, 0xf1f23535, + 0x0e0dcacb, 0x47471313, 0xb8b8eced, 0x13134747, 0xececb8b9, 0x49494949, 0xb6b6b6b7, 0x504ffdfe, + 0xafb00202, 0xfdfe5050, 0x0201afb0, 0x3433cbcc, 0xcbcc3434, 0x4645d8d9, 0xb9ba2727, 0xd8d94646, + 0x2726b9ba, 0x79793030, 0x8686cfd0, 0x30307979, 0xcfcf8687, 0x7c7c5454, 0x8383abac, 0x54547c7c, + 0xabab8384, 0x6463e4e5, 0x9b9c1b1b, 0xe4e56464, 0x1b1a9b9c, 0x6665aeaf, 0x999a5151, 0xaeaf6666, + 0x5150999a, 0x37373737, 0xc8c8c8c9, 0x68686868, 0x97979798, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, + 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7, + 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, + 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, + 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, + 0x0c0bfcfd, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, + 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xfcfd0c0c, 0xfcfd0c0c, + 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, + 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, + 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, + 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, + 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 0xebebebec, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, + 0xebebebec, 0xebebebec, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, + 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0xe8e8f9fa, + 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, + 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0x06061717, 0x06061717, 0x06061717, 0x06061717, + 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, + 0x06061717, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, + 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, + 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe, + 0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x08080404, 0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808, + 0xf7f7f7f8, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0807fbfc, 0xf7f80404, 0xfbfc0808, + 0x0403f7f8, 0x0e0e0808, 0xf1f1f7f8, 0x08080e0e, 0xf7f7f1f2, 0x0c0bfdfe, 0xf3f40202, 0xfdfe0c0c, + 0x0201f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000, + 0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff7f8, 0xeff00808, 0xf7f81010, + 0x0807eff0, 0x1a1a0a0a, 0xe5e5f5f6, 0x0a0a1a1a, 0xf5f5e5e6, 0x1c1c1212, 0xe3e3edee, 0x12121c1c, + 0xedede3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1c1c0404, 0xe3e3fbfc, 0x04041c1c, + 0xfbfbe3e4, 0x1e1e1e1e, 0xe1e1e1e2, 0x201ffdfe, 0xdfe00202, 0xfdfe2020, 0x0201dfe0, 0x1615edee, + 0xe9ea1212, 0xedee1616, 0x1211e9ea, 0x1e1df1f2, 0xe1e20e0e, 0xf1f21e1e, 0x0e0de1e2, 0x2e2e1616, + 0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2e2e0c0c, 0xd1d1f3f4, 0x0c0c2e2e, 0xf3f3d1d2, 0x30302222, + 0xcfcfddde, 0x22223030, 0xddddcfd0, 0x2827f5f6, 0xd7d80a0a, 0xf5f62828, 0x0a09d7d8, 0x32320404, + 0xcdcdfbfc, 0x04043232, 0xfbfbcdce, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e3e4, + 0xd5d61c1c, 0xe3e42a2a, 0x1c1bd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1a1a, + 0xb3b3e5e6, 0x1a1a4c4c, 0xe5e5b3b4, 0x4c4c2a2a, 0xb3b3d5d6, 0x2a2a4c4c, 0xd5d5b3b4, 0x3635e7e8, + 0xc9ca1818, 0xe7e83636, 0x1817c9ca, 0x4e4e0e0e, 0xb1b1f1f2, 0x0e0e4e4e, 0xf1f1b1b2, 0x52523e3e, + 0xadadc1c2, 0x3e3e5252, 0xc1c1adae, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202, + 0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5c5c5c5c, 0xa3a3a3a4, 0x3c3bcbcc, 0xc3c43434, 0xcbcc3c3c, + 0x3433c3c4, 0x76763434, 0x8989cbcc, 0x34347676, 0xcbcb898a, 0x4a49d3d4, 0xb5b62c2c, 0xd3d44a4a, + 0x2c2bb5b6, 0x76764a4a, 0x8989b5b6, 0x4a4a7676, 0xb5b5898a, 0x76762020, 0x8989dfe0, 0x20207676, + 0xdfdf898a, 0x6665f3f4, 0x999a0c0c, 0xf3f46666, 0x0c0b999a, 0x605fd7d8, 0x9fa02828, 0xd7d86060, + 0x28279fa0, 0x7675ddde, 0x898a2222, 0xddde7676, 0x2221898a, 0x5857a7a8, 0xa7a85858, 0x6867b1b2, + 0x97984e4e, 0xb1b26868, 0x4e4d9798, 0x0c0c0c0c, 0xf3f3f3f4, 0x16161616, 0xe9e9e9ea, 0x2a2a2a2a, + 0xd5d5d5d6, 0x48484848, 0xb7b7b7b8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0xfdfe0000, + 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202, + 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, + 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, + 0xf9f9f9fa, 0x09090303, 0xf6f6fcfd, 0x03030909, 0xfcfcf6f7, 0x0908fcfd, 0xf6f70303, 0xfcfd0909, + 0x0302f6f7, 0x0605f9fa, 0xf9fa0606, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0c0c0c, + 0xf3f3f3f4, 0x0f0f0000, 0xf0f10000, 0x00000f0f, 0xfffff0f1, 0x0c0bf6f7, 0xf3f40909, 0xf6f70c0c, + 0x0908f3f4, 0x18180f0f, 0xe7e7f0f1, 0x0f0f1818, 0xf0f0e7e8, 0x1211f9fa, 0xedee0606, 0xf9fa1212, + 0x0605edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1b0000, + 0xe4e50000, 0x00001b1b, 0xffffe4e5, 0x1211edee, 0xedee1212, 0x1817f3f4, 0xe7e80c0c, 0xf3f41818, + 0x0c0be7e8, 0x27270f0f, 0xd8d8f0f1, 0x0f0f2727, 0xf0f0d8d9, 0x2a2a1b1b, 0xd5d5e4e5, 0x1b1b2a2a, + 0xe4e4d5d6, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2a2a0606, 0xd5d5f9fa, 0x06062a2a, + 0xf9f9d5d6, 0x2d2d2d2d, 0xd2d2d2d3, 0x3332fcfd, 0xcccd0303, 0xfcfd3333, 0x0302cccd, 0x2120e4e5, + 0xdedf1b1b, 0xe4e52121, 0x1b1adedf, 0x2d2ceaeb, 0xd2d31515, 0xeaeb2d2d, 0x1514d2d3, 0x45452121, + 0xbabadedf, 0x21214545, 0xdedebabb, 0x45451212, 0xbabaedee, 0x12124545, 0xededbabb, 0x48483636, + 0xb7b7c9ca, 0x36364848, 0xc9c9b7b8, 0x3f3eedee, 0xc0c11212, 0xedee3f3f, 0x1211c0c1, 0x4e4e0606, + 0xb1b1f9fa, 0x06064e4e, 0xf9f9b1b2, 0x51515151, 0xaeaeaeaf, 0x3332cccd, 0xcccd3333, 0x3f3ed5d6, + 0xc0c12a2a, 0xd5d63f3f, 0x2a29c0c1, 0x5a59f6f7, 0xa5a60909, 0xf6f75a5a, 0x0908a5a6, 0x72722a2a, + 0x8d8dd5d6, 0x2a2a7272, 0xd5d58d8e, 0x75753f3f, 0x8a8ac0c1, 0x3f3f7575, 0xc0c08a8b, 0x5150dbdc, + 0xaeaf2424, 0xdbdc5151, 0x2423aeaf, 0x78781515, 0x8787eaeb, 0x15157878, 0xeaea8788, 0x7b7b6060, + 0x84849fa0, 0x60607b7b, 0x9f9f8485, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5d5cb1b2, + 0xa2a34e4e, 0xb1b25d5d, 0x4e4da2a3, 0x7271babb, 0x8d8e4545, 0xbabb7272, 0x45448d8e, 0x12121212, + 0xedededee, 0x21212121, 0xdedededf, 0x3f3f3f3f, 0xc0c0c0c1, 0x6c6c6c6c, 0x93939394, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, + 0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, + 0xfcfcfcfd, 0xfcfcfcfd, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, + 0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, + 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303, 0x00000303, 0x00000303, 0x00000303, + 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, + 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, + 0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808, 0xf7f7f7f8, 0x0807f7f8, 0xf7f80808, 0x0c0bfbfc, + 0xf3f40404, 0xfbfc0c0c, 0x0403f3f4, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x10101010, + 0xefefeff0, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x100ff3f4, 0xeff00c0c, 0xf3f41010, + 0x0c0beff0, 0x1817fbfc, 0xe7e80404, 0xfbfc1818, 0x0403e7e8, 0x20201010, 0xdfdfeff0, 0x10102020, + 0xefefdfe0, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x20202020, 0xdfdfdfe0, 0x24240000, + 0xdbdc0000, 0x00002424, 0xffffdbdc, 0x1817e7e8, 0xe7e81818, 0x201feff0, 0xdfe01010, 0xeff02020, + 0x100fdfe0, 0x34341414, 0xcbcbebec, 0x14143434, 0xebebcbcc, 0x38382424, 0xc7c7dbdc, 0x24243838, + 0xdbdbc7c8, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x38380808, 0xc7c7f7f8, 0x08083838, + 0xf7f7c7c8, 0x3c3c3c3c, 0xc3c3c3c4, 0x403ffbfc, 0xbfc00404, 0xfbfc4040, 0x0403bfc0, 0x2c2bdbdc, + 0xd3d42424, 0xdbdc2c2c, 0x2423d3d4, 0x3c3be3e4, 0xc3c41c1c, 0xe3e43c3c, 0x1c1bc3c4, 0x5c5c2c2c, + 0xa3a3d3d4, 0x2c2c5c5c, 0xd3d3a3a4, 0x5c5c1818, 0xa3a3e7e8, 0x18185c5c, 0xe7e7a3a4, 0x60604848, + 0x9f9fb7b8, 0x48486060, 0xb7b79fa0, 0x5453ebec, 0xabac1414, 0xebec5454, 0x1413abac, 0x64640808, + 0x9b9bf7f8, 0x08086464, 0xf7f79b9c, 0x6c6c6c6c, 0x93939394, 0x4443bbbc, 0xbbbc4444, 0x5453c7c8, + 0xabac3838, 0xc7c85454, 0x3837abac, 0x7877f3f4, 0x87880c0c, 0xf3f47878, 0x0c0b8788, 0x6c6bcfd0, + 0x93943030, 0xcfd06c6c, 0x302f9394, 0x7c7b9798, 0x83846868, 0x97987c7c, 0x68678384, 0x18181818, + 0xe7e7e7e8, 0x2c2c2c2c, 0xd3d3d3d4, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404, + 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, + 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, + 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, + 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, + 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404, + 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, + 0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, + 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404, + 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, + 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0x04040808, + 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, + 0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, + 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x05050f0f, 0xfafaf0f1, 0x0a09f5f6, 0xf5f60a0a, 0x0f0efafb, + 0xf0f10505, 0xfafb0f0f, 0x0504f0f1, 0x14140a0a, 0xebebf5f6, 0x0a0a1414, 0xf5f5ebec, 0x14141414, + 0xebebebec, 0x19190000, 0xe6e70000, 0x00001919, 0xffffe6e7, 0x1413f0f1, 0xebec0f0f, 0xf0f11414, + 0x0f0eebec, 0x28281919, 0xd7d7e6e7, 0x19192828, 0xe6e6d7d8, 0x1e1df5f6, 0xe1e20a0a, 0xf5f61e1e, + 0x0a09e1e2, 0x28280a0a, 0xd7d7f5f6, 0x0a0a2828, 0xf5f5d7d8, 0x28282828, 0xd7d7d7d8, 0x2d2d0000, + 0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x1e1de1e2, 0xe1e21e1e, 0x2827ebec, 0xd7d81414, 0xebec2828, + 0x1413d7d8, 0x41411919, 0xbebee6e7, 0x19194141, 0xe6e6bebf, 0x46462d2d, 0xb9b9d2d3, 0x2d2d4646, + 0xd2d2b9ba, 0x3736f0f1, 0xc8c90f0f, 0xf0f13737, 0x0f0ec8c9, 0x46460a0a, 0xb9b9f5f6, 0x0a0a4646, + 0xf5f5b9ba, 0x4b4b4b4b, 0xb4b4b4b5, 0x5554fafb, 0xaaab0505, 0xfafb5555, 0x0504aaab, 0x3736d2d3, + 0xc8c92d2d, 0xd2d33737, 0x2d2cc8c9, 0x4b4adcdd, 0xb4b52323, 0xdcdd4b4b, 0x2322b4b5, 0x73733737, + 0x8c8cc8c9, 0x37377373, 0xc8c88c8d, 0x73731e1e, 0x8c8ce1e2, 0x1e1e7373, 0xe1e18c8d, 0x78785a5a, + 0x8787a5a6, 0x5a5a7878, 0xa5a58788, 0x6968e1e2, 0x96971e1e, 0xe1e26969, 0x1e1d9697, 0x5554aaab, + 0xaaab5555, 0x6968b9ba, 0x96974646, 0xb9ba6969, 0x46459697, 0x1e1e1e1e, 0xe1e1e1e2, 0x3c3c3c3c, + 0xc3c3c3c4, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505, + 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, + 0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, + 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x05050000, 0x05050000, 0x05050000, 0x05050000, + 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0xfafb0000, + 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, + 0xfafb0000, 0xfafb0000, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, + 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0xfffffafb, 0xfffffafb, 0xfffffafb, + 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, + 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, + 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, + 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0f0f0505, 0x0f0f0505, + 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, + 0x0f0f0505, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, + 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, + 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0bf3f4, 0xf3f40c0c, 0x1211f9fa, + 0xedee0606, 0xf9fa1212, 0x0605edee, 0x18180c0c, 0xe7e7f3f4, 0x0c0c1818, 0xf3f3e7e8, 0x18181818, + 0xe7e7e7e8, 0x1e1e0000, 0xe1e20000, 0x00001e1e, 0xffffe1e2, 0x1817edee, 0xe7e81212, 0xedee1818, + 0x1211e7e8, 0x30301e1e, 0xcfcfe1e2, 0x1e1e3030, 0xe1e1cfd0, 0x2423f9fa, 0xdbdc0606, 0xf9fa2424, + 0x0605dbdc, 0x30300c0c, 0xcfcff3f4, 0x0c0c3030, 0xf3f3cfd0, 0x30303030, 0xcfcfcfd0, 0x36360000, + 0xc9ca0000, 0x00003636, 0xffffc9ca, 0x2423dbdc, 0xdbdc2424, 0x302fe7e8, 0xcfd01818, 0xe7e83030, + 0x1817cfd0, 0x4e4e1e1e, 0xb1b1e1e2, 0x1e1e4e4e, 0xe1e1b1b2, 0x54543636, 0xababc9ca, 0x36365454, + 0xc9c9abac, 0x4241edee, 0xbdbe1212, 0xedee4242, 0x1211bdbe, 0x54540c0c, 0xababf3f4, 0x0c0c5454, + 0xf3f3abac, 0x5a5a5a5a, 0xa5a5a5a6, 0x605ff9fa, 0x9fa00606, 0xf9fa6060, 0x06059fa0, 0x4241c9ca, + 0xbdbe3636, 0xc9ca4242, 0x3635bdbe, 0x5a59d5d6, 0xa5a62a2a, 0xd5d65a5a, 0x2a29a5a6, 0x7e7de1e2, + 0x81821e1e, 0xe1e27e7e, 0x1e1d8182, 0x6665999a, 0x999a6666, 0x7e7dabac, 0x81825454, 0xabac7e7e, + 0x54538182, 0x24242424, 0xdbdbdbdc, 0x42424242, 0xbdbdbdbe, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, + 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, + 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, + 0xf9fa0000, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, + 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, + 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, + 0xfffff9fa, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, + 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, + 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, + 0xf3f3f9fa, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, + 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, + 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0xf8f8eaeb, 0x0e0df1f2, 0xf1f20e0e, 0x1514f8f9, + 0xeaeb0707, 0xf8f91515, 0x0706eaeb, 0x1c1c0e0e, 0xe3e3f1f2, 0x0e0e1c1c, 0xf1f1e3e4, 0x1c1c1c1c, + 0xe3e3e3e4, 0x23230000, 0xdcdd0000, 0x00002323, 0xffffdcdd, 0x1c1beaeb, 0xe3e41515, 0xeaeb1c1c, + 0x1514e3e4, 0x38382323, 0xc7c7dcdd, 0x23233838, 0xdcdcc7c8, 0x2a29f1f2, 0xd5d60e0e, 0xf1f22a2a, + 0x0e0dd5d6, 0x38380e0e, 0xc7c7f1f2, 0x0e0e3838, 0xf1f1c7c8, 0x38383838, 0xc7c7c7c8, 0x3f3f0000, + 0xc0c10000, 0x00003f3f, 0xffffc0c1, 0x2a29d5d6, 0xd5d62a2a, 0x3837e3e4, 0xc7c81c1c, 0xe3e43838, + 0x1c1bc7c8, 0x5b5b2323, 0xa4a4dcdd, 0x23235b5b, 0xdcdca4a5, 0x62623f3f, 0x9d9dc0c1, 0x3f3f6262, + 0xc0c09d9e, 0x4d4ceaeb, 0xb2b31515, 0xeaeb4d4d, 0x1514b2b3, 0x62620e0e, 0x9d9df1f2, 0x0e0e6262, + 0xf1f19d9e, 0x69696969, 0x96969697, 0x7776f8f9, 0x88890707, 0xf8f97777, 0x07068889, 0x4d4cc0c1, + 0xb2b33f3f, 0xc0c14d4d, 0x3f3eb2b3, 0x6968cecf, 0x96973131, 0xcecf6969, 0x31309697, 0x77768889, + 0x88897777, 0x2a2a2a2a, 0xd5d5d5d6, 0x4d4d4d4d, 0xb2b2b2b3, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, + 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, + 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, + 0xf8f8f8f9, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, + 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000, 0xf8f90000, + 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, + 0xf8f90000, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, + 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, + 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, + 0xfffff8f9, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, + 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, + 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, + 0xf1f1f1f2, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, + 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, + 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, + 0xeaeaf8f9, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, + 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, + 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x100feff0, 0xeff01010, 0x1817f7f8, + 0xe7e80808, 0xf7f81818, 0x0807e7e8, 0x20201010, 0xdfdfeff0, 0x10102020, 0xefefdfe0, 0x20202020, + 0xdfdfdfe0, 0x28280000, 0xd7d80000, 0x00002828, 0xffffd7d8, 0x201fe7e8, 0xdfe01818, 0xe7e82020, + 0x1817dfe0, 0x40402828, 0xbfbfd7d8, 0x28284040, 0xd7d7bfc0, 0x302feff0, 0xcfd01010, 0xeff03030, + 0x100fcfd0, 0x40401010, 0xbfbfeff0, 0x10104040, 0xefefbfc0, 0x40404040, 0xbfbfbfc0, 0x48480000, + 0xb7b80000, 0x00004848, 0xffffb7b8, 0x302fcfd0, 0xcfd03030, 0x403fdfe0, 0xbfc02020, 0xdfe04040, + 0x201fbfc0, 0x68682828, 0x9797d7d8, 0x28286868, 0xd7d79798, 0x70704848, 0x8f8fb7b8, 0x48487070, + 0xb7b78f90, 0x5857e7e8, 0xa7a81818, 0xe7e85858, 0x1817a7a8, 0x70701010, 0x8f8feff0, 0x10107070, + 0xefef8f90, 0x78787878, 0x87878788, 0x5857b7b8, 0xa7a84848, 0xb7b85858, 0x4847a7a8, 0x7877c7c8, + 0x87883838, 0xc7c87878, 0x38378788, 0x30303030, 0xcfcfcfd0, 0x58585858, 0xa7a7a7a8, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, + 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8, + 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, + 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, + 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0xf7f80000, + 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, + 0xf7f80000, 0xf7f80000, 0xf7f80000, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, + 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0xfffff7f8, + 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, + 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0, + 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, + 0xefefeff0, 0xefefeff0, 0xefefeff0, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, + 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0xefeff7f8, + 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, + 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, + 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, + 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x1211edee, 0xedee1212, 0x1b1af6f7, + 0xe4e50909, 0xf6f71b1b, 0x0908e4e5, 0x24241212, 0xdbdbedee, 0x12122424, 0xededdbdc, 0x24242424, + 0xdbdbdbdc, 0x2d2d0000, 0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x2423e4e5, 0xdbdc1b1b, 0xe4e52424, + 0x1b1adbdc, 0x48482d2d, 0xb7b7d2d3, 0x2d2d4848, 0xd2d2b7b8, 0x3635edee, 0xc9ca1212, 0xedee3636, + 0x1211c9ca, 0x48481212, 0xb7b7edee, 0x12124848, 0xededb7b8, 0x48484848, 0xb7b7b7b8, 0x51510000, + 0xaeaf0000, 0x00005151, 0xffffaeaf, 0x3635c9ca, 0xc9ca3636, 0x4847dbdc, 0xb7b82424, 0xdbdc4848, + 0x2423b7b8, 0x75752d2d, 0x8a8ad2d3, 0x2d2d7575, 0xd2d28a8b, 0x7e7e5151, 0x8181aeaf, 0x51517e7e, + 0xaeae8182, 0x6362e4e5, 0x9c9d1b1b, 0xe4e56363, 0x1b1a9c9d, 0x6362aeaf, 0x9c9d5151, 0xaeaf6363, + 0x51509c9d, 0x36363636, 0xc9c9c9ca, 0x6c6c6c6c, 0x93939394, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, + 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7, + 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, + 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x09090000, 0x09090000, 0x09090000, 0x09090000, + 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, + 0x09090000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, + 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0x00000909, 0x00000909, + 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, + 0x00000909, 0x00000909, 0x00000909, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, + 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, + 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, + 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee, 0xedededee, 0xedededee, + 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, + 0xedededee, 0xedededee, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, + 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0xe4e4f6f7, + 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, + 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, + 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, + 0x09091b1b, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, + 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, + 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0504fafb, 0xfafb0505, 0xfafb0505, + 0x0504fafb, 0x0b0b0606, 0xf4f4f9fa, 0x06060b0b, 0xf9f9f4f5, 0x08080000, 0xf7f80000, 0x00000808, + 0xfffff7f8, 0x0b0b0b0b, 0xf4f4f4f5, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x11110c0c, + 0xeeeef3f4, 0x0c0c1111, 0xf3f3eeef, 0x11111111, 0xeeeeeeef, 0x12120606, 0xededf9fa, 0x06061212, + 0xf9f9edee, 0x0b0af7f8, 0xf4f50808, 0xf7f80b0b, 0x0807f4f5, 0x0f0f0000, 0xf0f10000, 0x00000f0f, + 0xfffff0f1, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x19191212, 0xe6e6edee, 0x12121919, + 0xedede6e7, 0x19190b0b, 0xe6e6f4f5, 0x0b0b1919, 0xf4f4e6e7, 0x19191919, 0xe6e6e6e7, 0x0e0df1f2, + 0xf1f20e0e, 0xf1f20e0e, 0x0e0df1f2, 0x1a1a0000, 0xe5e60000, 0x00001a1a, 0xffffe5e6, 0x1211f4f5, + 0xedee0b0b, 0xf4f51212, 0x0b0aedee, 0x1615f8f9, 0xe9ea0707, 0xf8f91616, 0x0706e9ea, 0x22221a1a, + 0xdddde5e6, 0x1a1a2222, 0xe5e5ddde, 0x22221212, 0xddddedee, 0x12122222, 0xededddde, 0x22222222, + 0xddddddde, 0x23230b0b, 0xdcdcf4f5, 0x0b0b2323, 0xf4f4dcdd, 0x1d1d0000, 0xe2e30000, 0x00001d1d, + 0xffffe2e3, 0x1615eced, 0xe9ea1313, 0xeced1616, 0x1312e9ea, 0x1a19f0f1, 0xe5e60f0f, 0xf0f11a1a, + 0x0f0ee5e6, 0x25250000, 0xdadb0000, 0x00002525, 0xffffdadb, 0x2c2c1b1b, 0xd3d3e4e5, 0x1b1b2c2c, + 0xe4e4d3d4, 0x2c2c2424, 0xd3d3dbdc, 0x24242c2c, 0xdbdbd3d4, 0x2c2c1212, 0xd3d3edee, 0x12122c2c, + 0xededd3d4, 0x2120f5f6, 0xdedf0a0a, 0xf5f62121, 0x0a09dedf, 0x2d2d2d2d, 0xd2d2d2d3, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, + 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, + 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, + 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, + 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, + 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, + 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, + 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, + 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, + 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, + 0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0403fbfc, 0xfbfc0404, 0xf9fa0a0a, + 0x0605f5f6, 0xf3f40000, 0x0c0c0000, 0xf3f3f9fa, 0xf3f40606, 0x0c0bf9fa, 0x0c0c0606, 0xfffff1f2, + 0x00000e0e, 0x0c0c0c0c, 0xf3f3f3f4, 0xedee0000, 0x12120000, 0xf3f40e0e, 0x0c0bf1f2, 0xf9f9edee, + 0xf9fa1212, 0x0605edee, 0x06061212, 0xededf5f6, 0xedee0a0a, 0x1211f5f6, 0x12120a0a, 0xffffe9ea, + 0x00001616, 0xe7e80000, 0x18180000, 0xf3f3e9ea, 0xf3f41616, 0x0c0be9ea, 0x0c0c1616, 0xe7e7f7f8, + 0xe7e80808, 0x1817f7f8, 0x18180808, 0xf9f9e5e6, 0xf9fa1a1a, 0x0605e5e6, 0x06061a1a, 0xffffe3e4, + 0x00001c1c, 0x14141414, 0xebebebec, 0xe5e5f1f2, 0x1a1a0e0e, 0xf3f3e1e2, 0x0c0c1e1e, 0xdfdff5f6, + 0x20200a0a, 0xdfdfedee, 0x20201212, 0xe5e5e5e6, 0x1a1a1a1a, 0xebebddde, 0x14142222, 0xf3f3d9da, + 0x0c0c2626, 0xdfdfdfe0, 0x20202020, 0x20202020, 0xd7d7e9ea, 0xddddddde, 0x22222222, 0x00000000, + 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, + 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, + 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, + 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, + 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, + 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, + 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, + 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, + 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, + 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, + 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, + 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, + 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, + 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, + 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0605f9fa, 0xf9fa0606, 0xf7f80e0e, + 0x0807f1f2, 0xffffedee, 0x00001212, 0xeff00a0a, 0x100ff5f6, 0xe7e80000, 0x18180000, 0xf7f7e7e8, + 0xf7f81818, 0x0807e7e8, 0x08081818, 0x12121212, 0xedededee, 0xeff01414, 0x100febec, 0xe5e5f1f2, + 0xe5e60e0e, 0x1a19f1f2, 0x1a1a0e0e, 0xffffe1e2, 0x00001e1e, 0xddde0000, 0x22220000, 0xf7f7ddde, + 0xf7f82222, 0x0807ddde, 0x08082222, 0xedede1e2, 0xedee1e1e, 0x1211e1e2, 0x12121e1e, 0xddddf5f6, + 0xddde0a0a, 0x2221f5f6, 0x22220a0a, 0xddddebec, 0x22221414, 0xffffd7d8, 0x00002828, 0x1e1e1e1e, + 0xe1e1e1e2, 0xededd7d8, 0x12122828, 0xd3d40000, 0x2c2c0000, 0xd3d3eff0, 0x2c2c1010, 0xdbdbdbdc, + 0xdbdbdbdc, 0x24242424, 0xd3d3e5e6, 0x2c2c1a1a, 0xe5e5d1d2, 0x1a1a2e2e, 0xededcbcc, 0x12123434, + 0xc9c9ebec, 0xd3d3d3d4, 0x2c2c2c2c, 0xc9c9dfe0, 0xd1d1d1d2, 0xd1d1d1d2, 0x2e2e2e2e, 0x00000000, + 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, + 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, + 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, + 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, + 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, + 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, + 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, + 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, + 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, + 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, + 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, + 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, + 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, + 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, + 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, + 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, + 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, + 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, + 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, + 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, + 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, + 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x0807f7f8, 0xf7f80808, 0xeff00808, + 0x100ff7f8, 0xe7e80000, 0x18180000, 0xf7f7e7e8, 0xf7f81818, 0x0807e7e8, 0x08081818, 0xeff01414, + 0x100febec, 0xffffe3e4, 0x00001c1c, 0xe7e7eff0, 0xe7e81010, 0x1817eff0, 0x18181010, 0xdfe00000, + 0x20200000, 0xefefe3e4, 0xeff01c1c, 0x100fe3e4, 0x10101c1c, 0xdfdff7f8, 0xdfe00808, 0xf7f7dfe0, + 0xf7f82020, 0x0807dfe0, 0x08082020, 0x201ff7f8, 0x20200808, 0x18181818, 0xe7e7e7e8, 0xe7e81818, + 0x1817e7e8, 0xdfdfebec, 0x20201414, 0xffffd7d8, 0x00002828, 0xefefd7d8, 0x10102828, 0xd3d40000, + 0xd3d40000, 0xffffd3d4, 0x00002c2c, 0x2c2c0000, 0x2c2c0000, 0xdfdfdfe0, 0x20202020, 0xd3d3eff0, + 0x2c2c1010, 0xd3d3e7e8, 0xe7e7d3d4, 0x18182c2c, 0x2c2c1818, 0xefefcfd0, 0x10103030, 0xdbdbdbdc, + 0xdbdbdbdc, 0x24242424, 0x24242424, 0xcbcbebec, 0x28282828, 0xd7d7d7d8, 0xcbcbdfe0, 0x00000000, + 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, + 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, + 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, + 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, + 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, + 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, + 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, + 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, + 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, + 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, + 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, + 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, + 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, + 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, + 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, + 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, + 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, + 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, + 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, + 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, + 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, + 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, + 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, + 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, + 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, + 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, + 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, + 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, + 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, + 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, + 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, + 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, + 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; + + +static const uint32_t correctionhighorder[] = { + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, + 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, + 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, + 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, + 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, + 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, + 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, + 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, + 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, + 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, + 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, + 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc, + 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, + 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, + 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, + 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, + 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, + 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, + 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, + 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, + 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, + 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, + 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, + 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, + 0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, + 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, + 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, + 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, + 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, + 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, + 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, + 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, + 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, + 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, + 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, + 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, + 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, + 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, + 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, + 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, + 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, + 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, + 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, + 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, + 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, + 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, + 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, + 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, + 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, + 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, + 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, + 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, + 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, + 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, + 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, + 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, + 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, + 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, + 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, + 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, + 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, + 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, + 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, + 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, + 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, + 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, + 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, + 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, + 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, + 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, + 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, + 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, + 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, + 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, + 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, + 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7, + 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, + 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, + 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, + 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, + 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, + 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, + 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, + 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, + 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, + 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, + 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, + 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, + 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, + 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, + 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, + 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, + 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, + 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, + 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, + 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, + 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, + 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, + 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, + 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, + 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, + 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, + 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, + 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, + 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, + 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, + 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, + 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, + 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, + 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, + 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, + 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc, + 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, + 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, + 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, + 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, + 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, + 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, + 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, + 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, + 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, + 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, + 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, + 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, + 0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, + 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, + 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, + 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, + 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, + 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, + 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, + 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, + 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, + 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, + 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, + 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, + 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, + 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, + 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, + 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, + 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, + 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, + 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, + 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, + 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, + 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, + 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, + 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, + 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, + 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, + 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, + 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, + 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, + 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, + 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, + 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, + 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, + 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, + 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, + 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, + 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, + 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, + 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, + 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, + 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, + 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, + 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, + 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, + 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, + 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, + 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, + 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, + 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, + 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7, + 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, + 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, + 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, + 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, + 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, + 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, + 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, + 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, + 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, + 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, + 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, + 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, + 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, + 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, + 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, + 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, + 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, + 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, + 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, + 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, + 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, + 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, + 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303, + 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, + 0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, + 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, + 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000, + 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, + 0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, + 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, + 0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, + 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202, + 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, + 0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, + 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000, + 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, + 0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, + 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0x00000606, 0x00000606, + 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, + 0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, + 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, + 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, + 0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, + 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202, + 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, + 0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, + 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, + 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, + 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, + 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, + 0xfbfbfbfc, 0xfbfbfbfc, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, + 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6, + 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, + 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, + 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, + 0x0a0a0000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, + 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0x00000a0a, 0x00000a0a, + 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, + 0x00000a0a, 0x00000a0a, 0x00000a0a, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, + 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, + 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, + 0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, + 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404, 0x00000404, + 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, + 0x00000404, 0x00000404, 0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, + 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, + 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, + 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, + 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, + 0xfbfbfbfc, 0xfbfbfbfc, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, + 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, + 0x0c0c0000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, + 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0x00000c0c, 0x00000c0c, + 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, + 0x00000c0c, 0x00000c0c, 0x00000c0c, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, + 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, + 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, + 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, + 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, + 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, + 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, + 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, + 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, + 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, + 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, + 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, + 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, + 0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, + 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, + 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, + 0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, + 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, + 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 +}; diff --git a/mpeg4/src/libavcodec/internal.h b/mpeg4/src/libavcodec/internal.h new file mode 100644 index 0000000000000000000000000000000000000000..189090eaada4e537eb77d0d84810d852d72b5b94 --- /dev/null +++ b/mpeg4/src/libavcodec/internal.h @@ -0,0 +1,12 @@ +#ifndef INTERNAL_H +#define INTERNAL_H + +/** + * @file internal.h + * common functions for internal libavcodec use + */ + + +int av_tempfile(char *prefix, char **filename); + +#endif /* INTERNAL_H */ diff --git a/mpeg4/src/libavcodec/interplayvideo.c b/mpeg4/src/libavcodec/interplayvideo.c new file mode 100644 index 0000000000000000000000000000000000000000..73165e7950357d382f5a623e1bd4e75dfa59443d --- /dev/null +++ b/mpeg4/src/libavcodec/interplayvideo.c @@ -0,0 +1,962 @@ +/* + * Interplay MVE Video Decoder + * Copyright (C) 2003 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file interplayvideo.c + * Interplay MVE Video Decoder by Mike Melanson (melanson@pcisys.net) + * For more information about the Interplay MVE format, visit: + * http://www.pcisys.net/~melanson/codecs/interplay-mve.txt + * This code is written in such a way that the identifiers match up + * with the encoding descriptions in the document. + * + * This decoder presently only supports a PAL8 output colorspace. + * + * An Interplay video frame consists of 2 parts: The decoding map and + * the video data. A demuxer must load these 2 parts together in a single + * buffer before sending it through the stream to this decoder. + */ + +#include +#include +#include +#include + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" + +#define PALETTE_COUNT 256 + +/* debugging support */ +#define DEBUG_INTERPLAY 0 +#if DEBUG_INTERPLAY +#define debug_interplay(x,...) av_log(NULL, AV_LOG_DEBUG, x, __VA_ARGS__) +#else +static inline void debug_interplay(const char *format, ...) { } +#endif + +typedef struct IpvideoContext { + + AVCodecContext *avctx; + DSPContext dsp; + AVFrame second_last_frame; + AVFrame last_frame; + AVFrame current_frame; + unsigned char *decoding_map; + int decoding_map_size; + + unsigned char *buf; + int size; + + unsigned char *stream_ptr; + unsigned char *stream_end; + unsigned char *pixel_ptr; + int line_inc; + int stride; + int upper_motion_limit_offset; + +} IpvideoContext; + +#define CHECK_STREAM_PTR(n) \ + if ((s->stream_ptr + n) > s->stream_end) { \ + av_log(s->avctx, AV_LOG_ERROR, "Interplay video warning: stream_ptr out of bounds (%p >= %p)\n", \ + s->stream_ptr + n, s->stream_end); \ + return -1; \ + } + +#define COPY_FROM_CURRENT() \ + motion_offset = current_offset; \ + motion_offset += y * s->stride; \ + motion_offset += x; \ + if (motion_offset < 0) { \ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset); \ + return -1; \ + } else if (motion_offset > s->upper_motion_limit_offset) { \ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n", \ + motion_offset, s->upper_motion_limit_offset); \ + return -1; \ + } \ + s->dsp.put_pixels_tab[0][0](s->pixel_ptr, \ + s->current_frame.data[0] + motion_offset, s->stride, 8); + +#define COPY_FROM_PREVIOUS() \ + motion_offset = current_offset; \ + motion_offset += y * s->stride; \ + motion_offset += x; \ + if (motion_offset < 0) { \ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset); \ + return -1; \ + } else if (motion_offset > s->upper_motion_limit_offset) { \ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n", \ + motion_offset, s->upper_motion_limit_offset); \ + return -1; \ + } \ + s->dsp.put_pixels_tab[0][0](s->pixel_ptr, \ + s->last_frame.data[0] + motion_offset, s->stride, 8); + +#define COPY_FROM_SECOND_LAST() \ + motion_offset = current_offset; \ + motion_offset += y * s->stride; \ + motion_offset += x; \ + if (motion_offset < 0) { \ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset < 0 (%d)\n", motion_offset); \ + return -1; \ + } else if (motion_offset > s->upper_motion_limit_offset) { \ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: motion offset above limit (%d >= %d)\n", \ + motion_offset, s->upper_motion_limit_offset); \ + return -1; \ + } \ + s->dsp.put_pixels_tab[0][0](s->pixel_ptr, \ + s->second_last_frame.data[0] + motion_offset, s->stride, 8); + +static int ipvideo_decode_block_opcode_0x0(IpvideoContext *s) +{ + int x, y; + int motion_offset; + int current_offset = s->pixel_ptr - s->current_frame.data[0]; + + /* copy a block from the previous frame */ + x = y = 0; + COPY_FROM_PREVIOUS(); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x1(IpvideoContext *s) +{ + int x, y; + int motion_offset; + int current_offset = s->pixel_ptr - s->current_frame.data[0]; + + /* copy block from 2 frames ago */ + x = y = 0; + COPY_FROM_SECOND_LAST(); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x2(IpvideoContext *s) +{ + unsigned char B; + int x, y; + int motion_offset; + int current_offset = s->pixel_ptr - s->current_frame.data[0]; + + /* copy block from 2 frames ago using a motion vector; need 1 more byte */ + CHECK_STREAM_PTR(1); + B = *s->stream_ptr++; + + if (B < 56) { + x = 8 + (B % 7); + y = B / 7; + } else { + x = -14 + ((B - 56) % 29); + y = 8 + ((B - 56) / 29); + } + + debug_interplay (" motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); + COPY_FROM_SECOND_LAST(); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x3(IpvideoContext *s) +{ + unsigned char B; + int x, y; + int motion_offset; + int current_offset = s->pixel_ptr - s->current_frame.data[0]; + + /* copy 8x8 block from current frame from an up/left block */ + + /* need 1 more byte for motion */ + CHECK_STREAM_PTR(1); + B = *s->stream_ptr++; + + if (B < 56) { + x = -(8 + (B % 7)); + y = -(B / 7); + } else { + x = -(-14 + ((B - 56) % 29)); + y = -( 8 + ((B - 56) / 29)); + } + + debug_interplay (" motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); + COPY_FROM_CURRENT(); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x4(IpvideoContext *s) +{ + int x, y; + unsigned char B, BL, BH; + int motion_offset; + int current_offset = s->pixel_ptr - s->current_frame.data[0]; + + /* copy a block from the previous frame; need 1 more byte */ + CHECK_STREAM_PTR(1); + + B = *s->stream_ptr++; + BL = B & 0x0F; + BH = (B >> 4) & 0x0F; + x = -8 + BL; + y = -8 + BH; + + debug_interplay (" motion byte = %d, (x, y) = (%d, %d)\n", B, x, y); + COPY_FROM_PREVIOUS(); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x5(IpvideoContext *s) +{ + signed char x, y; + int motion_offset; + int current_offset = s->pixel_ptr - s->current_frame.data[0]; + + /* copy a block from the previous frame using an expanded range; + * need 2 more bytes */ + CHECK_STREAM_PTR(2); + + x = *s->stream_ptr++; + y = *s->stream_ptr++; + + debug_interplay (" motion bytes = %d, %d\n", x, y); + COPY_FROM_PREVIOUS(); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x6(IpvideoContext *s) +{ + /* mystery opcode? skip multiple blocks? */ + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: Help! Mystery opcode 0x6 seen\n"); + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x7(IpvideoContext *s) +{ + int x, y; + unsigned char P0, P1; + unsigned char B[8]; + unsigned int flags; + int bitmask; + + /* 2-color encoding */ + CHECK_STREAM_PTR(2); + + P0 = *s->stream_ptr++; + P1 = *s->stream_ptr++; + + if (P0 <= P1) { + + /* need 8 more bytes from the stream */ + CHECK_STREAM_PTR(8); + for (y = 0; y < 8; y++) + B[y] = *s->stream_ptr++; + + for (y = 0; y < 8; y++) { + flags = B[y]; + for (x = 0x01; x <= 0x80; x <<= 1) { + if (flags & x) + *s->pixel_ptr++ = P1; + else + *s->pixel_ptr++ = P0; + } + s->pixel_ptr += s->line_inc; + } + + } else { + + /* need 2 more bytes from the stream */ + CHECK_STREAM_PTR(2); + B[0] = *s->stream_ptr++; + B[1] = *s->stream_ptr++; + + flags = (B[1] << 8) | B[0]; + bitmask = 0x0001; + for (y = 0; y < 8; y += 2) { + for (x = 0; x < 8; x += 2, bitmask <<= 1) { + if (flags & bitmask) { + *(s->pixel_ptr + x) = P1; + *(s->pixel_ptr + x + 1) = P1; + *(s->pixel_ptr + s->stride + x) = P1; + *(s->pixel_ptr + s->stride + x + 1) = P1; + } else { + *(s->pixel_ptr + x) = P0; + *(s->pixel_ptr + x + 1) = P0; + *(s->pixel_ptr + s->stride + x) = P0; + *(s->pixel_ptr + s->stride + x + 1) = P0; + } + } + s->pixel_ptr += s->stride * 2; + } + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x8(IpvideoContext *s) +{ + int x, y; + unsigned char P[8]; + unsigned char B[8]; + unsigned int flags = 0; + unsigned int bitmask = 0; + unsigned char P0 = 0, P1 = 0; + int lower_half = 0; + + /* 2-color encoding for each 4x4 quadrant, or 2-color encoding on + * either top and bottom or left and right halves */ + CHECK_STREAM_PTR(2); + + P[0] = *s->stream_ptr++; + P[1] = *s->stream_ptr++; + + if (P[0] <= P[1]) { + + /* need 12 more bytes */ + CHECK_STREAM_PTR(12); + B[0] = *s->stream_ptr++; B[1] = *s->stream_ptr++; + P[2] = *s->stream_ptr++; P[3] = *s->stream_ptr++; + B[2] = *s->stream_ptr++; B[3] = *s->stream_ptr++; + P[4] = *s->stream_ptr++; P[5] = *s->stream_ptr++; + B[4] = *s->stream_ptr++; B[5] = *s->stream_ptr++; + P[6] = *s->stream_ptr++; P[7] = *s->stream_ptr++; + B[6] = *s->stream_ptr++; B[7] = *s->stream_ptr++; + + for (y = 0; y < 8; y++) { + + /* time to reload flags? */ + if (y == 0) { + flags = + ((B[0] & 0xF0) << 4) | ((B[4] & 0xF0) << 8) | + ((B[0] & 0x0F) ) | ((B[4] & 0x0F) << 4) | + ((B[1] & 0xF0) << 20) | ((B[5] & 0xF0) << 24) | + ((B[1] & 0x0F) << 16) | ((B[5] & 0x0F) << 20); + bitmask = 0x00000001; + lower_half = 0; /* still on top half */ + } else if (y == 4) { + flags = + ((B[2] & 0xF0) << 4) | ((B[6] & 0xF0) << 8) | + ((B[2] & 0x0F) ) | ((B[6] & 0x0F) << 4) | + ((B[3] & 0xF0) << 20) | ((B[7] & 0xF0) << 24) | + ((B[3] & 0x0F) << 16) | ((B[7] & 0x0F) << 20); + bitmask = 0x00000001; + lower_half = 2; + } + + for (x = 0; x < 8; x++, bitmask <<= 1) { + /* get the pixel values ready for this quadrant */ + if (x == 0) { + P0 = P[lower_half + 0]; + P1 = P[lower_half + 1]; + } else if (x == 4) { + P0 = P[lower_half + 4]; + P1 = P[lower_half + 5]; + } + + if (flags & bitmask) + *s->pixel_ptr++ = P1; + else + *s->pixel_ptr++ = P0; + } + s->pixel_ptr += s->line_inc; + } + + } else { + + /* need 10 more bytes */ + CHECK_STREAM_PTR(10); + B[0] = *s->stream_ptr++; B[1] = *s->stream_ptr++; + B[2] = *s->stream_ptr++; B[3] = *s->stream_ptr++; + P[2] = *s->stream_ptr++; P[3] = *s->stream_ptr++; + B[4] = *s->stream_ptr++; B[5] = *s->stream_ptr++; + B[6] = *s->stream_ptr++; B[7] = *s->stream_ptr++; + + if (P[2] <= P[3]) { + + /* vertical split; left & right halves are 2-color encoded */ + + for (y = 0; y < 8; y++) { + + /* time to reload flags? */ + if (y == 0) { + flags = + ((B[0] & 0xF0) << 4) | ((B[4] & 0xF0) << 8) | + ((B[0] & 0x0F) ) | ((B[4] & 0x0F) << 4) | + ((B[1] & 0xF0) << 20) | ((B[5] & 0xF0) << 24) | + ((B[1] & 0x0F) << 16) | ((B[5] & 0x0F) << 20); + bitmask = 0x00000001; + } else if (y == 4) { + flags = + ((B[2] & 0xF0) << 4) | ((B[6] & 0xF0) << 8) | + ((B[2] & 0x0F) ) | ((B[6] & 0x0F) << 4) | + ((B[3] & 0xF0) << 20) | ((B[7] & 0xF0) << 24) | + ((B[3] & 0x0F) << 16) | ((B[7] & 0x0F) << 20); + bitmask = 0x00000001; + } + + for (x = 0; x < 8; x++, bitmask <<= 1) { + /* get the pixel values ready for this half */ + if (x == 0) { + P0 = P[0]; + P1 = P[1]; + } else if (x == 4) { + P0 = P[2]; + P1 = P[3]; + } + + if (flags & bitmask) + *s->pixel_ptr++ = P1; + else + *s->pixel_ptr++ = P0; + } + s->pixel_ptr += s->line_inc; + } + + } else { + + /* horizontal split; top & bottom halves are 2-color encoded */ + + for (y = 0; y < 8; y++) { + + flags = B[y]; + if (y == 0) { + P0 = P[0]; + P1 = P[1]; + } else if (y == 4) { + P0 = P[2]; + P1 = P[3]; + } + + for (bitmask = 0x01; bitmask <= 0x80; bitmask <<= 1) { + + if (flags & bitmask) + *s->pixel_ptr++ = P1; + else + *s->pixel_ptr++ = P0; + } + s->pixel_ptr += s->line_inc; + } + } + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0x9(IpvideoContext *s) +{ + int x, y; + unsigned char P[4]; + unsigned char B[4]; + unsigned int flags = 0; + int shifter = 0; + unsigned char pix; + + /* 4-color encoding */ + CHECK_STREAM_PTR(4); + + for (y = 0; y < 4; y++) + P[y] = *s->stream_ptr++; + + if ((P[0] <= P[1]) && (P[2] <= P[3])) { + + /* 1 of 4 colors for each pixel, need 16 more bytes */ + CHECK_STREAM_PTR(16); + + for (y = 0; y < 8; y++) { + /* get the next set of 8 2-bit flags */ + flags = (s->stream_ptr[1] << 8) | s->stream_ptr[0]; + s->stream_ptr += 2; + for (x = 0, shifter = 0; x < 8; x++, shifter += 2) { + *s->pixel_ptr++ = P[(flags >> shifter) & 0x03]; + } + s->pixel_ptr += s->line_inc; + } + + } else if ((P[0] <= P[1]) && (P[2] > P[3])) { + + /* 1 of 4 colors for each 2x2 block, need 4 more bytes */ + CHECK_STREAM_PTR(4); + + B[0] = *s->stream_ptr++; + B[1] = *s->stream_ptr++; + B[2] = *s->stream_ptr++; + B[3] = *s->stream_ptr++; + flags = (B[3] << 24) | (B[2] << 16) | (B[1] << 8) | B[0]; + shifter = 0; + + for (y = 0; y < 8; y += 2) { + for (x = 0; x < 8; x += 2, shifter += 2) { + pix = P[(flags >> shifter) & 0x03]; + *(s->pixel_ptr + x) = pix; + *(s->pixel_ptr + x + 1) = pix; + *(s->pixel_ptr + s->stride + x) = pix; + *(s->pixel_ptr + s->stride + x + 1) = pix; + } + s->pixel_ptr += s->stride * 2; + } + + } else if ((P[0] > P[1]) && (P[2] <= P[3])) { + + /* 1 of 4 colors for each 2x1 block, need 8 more bytes */ + CHECK_STREAM_PTR(8); + + for (y = 0; y < 8; y++) { + /* time to reload flags? */ + if ((y == 0) || (y == 4)) { + B[0] = *s->stream_ptr++; + B[1] = *s->stream_ptr++; + B[2] = *s->stream_ptr++; + B[3] = *s->stream_ptr++; + flags = (B[3] << 24) | (B[2] << 16) | (B[1] << 8) | B[0]; + shifter = 0; + } + for (x = 0; x < 8; x += 2, shifter += 2) { + pix = P[(flags >> shifter) & 0x03]; + *(s->pixel_ptr + x) = pix; + *(s->pixel_ptr + x + 1) = pix; + } + s->pixel_ptr += s->stride; + } + + } else { + + /* 1 of 4 colors for each 1x2 block, need 8 more bytes */ + CHECK_STREAM_PTR(8); + + for (y = 0; y < 8; y += 2) { + /* time to reload flags? */ + if ((y == 0) || (y == 4)) { + B[0] = *s->stream_ptr++; + B[1] = *s->stream_ptr++; + B[2] = *s->stream_ptr++; + B[3] = *s->stream_ptr++; + flags = (B[3] << 24) | (B[2] << 16) | (B[1] << 8) | B[0]; + shifter = 0; + } + for (x = 0; x < 8; x++, shifter += 2) { + pix = P[(flags >> shifter) & 0x03]; + *(s->pixel_ptr + x) = pix; + *(s->pixel_ptr + s->stride + x) = pix; + } + s->pixel_ptr += s->stride * 2; + } + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0xA(IpvideoContext *s) +{ + int x, y; + unsigned char P[16]; + unsigned char B[16]; + int flags = 0; + int shifter = 0; + int index; + int split; + int lower_half; + + /* 4-color encoding for each 4x4 quadrant, or 4-color encoding on + * either top and bottom or left and right halves */ + CHECK_STREAM_PTR(4); + + for (y = 0; y < 4; y++) + P[y] = *s->stream_ptr++; + + if (P[0] <= P[1]) { + + /* 4-color encoding for each quadrant; need 28 more bytes */ + CHECK_STREAM_PTR(28); + + for (y = 0; y < 4; y++) + B[y] = *s->stream_ptr++; + for (y = 4; y < 16; y += 4) { + for (x = y; x < y + 4; x++) + P[x] = *s->stream_ptr++; + for (x = y; x < y + 4; x++) + B[x] = *s->stream_ptr++; + } + + for (y = 0; y < 8; y++) { + + lower_half = (y >= 4) ? 4 : 0; + flags = (B[y + 8] << 8) | B[y]; + + for (x = 0, shifter = 0; x < 8; x++, shifter += 2) { + split = (x >= 4) ? 8 : 0; + index = split + lower_half + ((flags >> shifter) & 0x03); + *s->pixel_ptr++ = P[index]; + } + + s->pixel_ptr += s->line_inc; + } + + } else { + + /* 4-color encoding for either left and right or top and bottom + * halves; need 20 more bytes */ + CHECK_STREAM_PTR(20); + + for (y = 0; y < 8; y++) + B[y] = *s->stream_ptr++; + for (y = 4; y < 8; y++) + P[y] = *s->stream_ptr++; + for (y = 8; y < 16; y++) + B[y] = *s->stream_ptr++; + + if (P[4] <= P[5]) { + + /* block is divided into left and right halves */ + for (y = 0; y < 8; y++) { + + flags = (B[y + 8] << 8) | B[y]; + split = 0; + + for (x = 0, shifter = 0; x < 8; x++, shifter += 2) { + if (x == 4) + split = 4; + *s->pixel_ptr++ = P[split + ((flags >> shifter) & 0x03)]; + } + + s->pixel_ptr += s->line_inc; + } + + } else { + + /* block is divided into top and bottom halves */ + split = 0; + for (y = 0; y < 8; y++) { + + flags = (B[y * 2 + 1] << 8) | B[y * 2]; + if (y == 4) + split = 4; + + for (x = 0, shifter = 0; x < 8; x++, shifter += 2) + *s->pixel_ptr++ = P[split + ((flags >> shifter) & 0x03)]; + + s->pixel_ptr += s->line_inc; + } + } + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0xB(IpvideoContext *s) +{ + int x, y; + + /* 64-color encoding (each pixel in block is a different color) */ + CHECK_STREAM_PTR(64); + + for (y = 0; y < 8; y++) { + for (x = 0; x < 8; x++) { + *s->pixel_ptr++ = *s->stream_ptr++; + } + s->pixel_ptr += s->line_inc; + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0xC(IpvideoContext *s) +{ + int x, y; + unsigned char pix; + + /* 16-color block encoding: each 2x2 block is a different color */ + CHECK_STREAM_PTR(16); + + for (y = 0; y < 8; y += 2) { + for (x = 0; x < 8; x += 2) { + pix = *s->stream_ptr++; + *(s->pixel_ptr + x) = pix; + *(s->pixel_ptr + x + 1) = pix; + *(s->pixel_ptr + s->stride + x) = pix; + *(s->pixel_ptr + s->stride + x + 1) = pix; + } + s->pixel_ptr += s->stride * 2; + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0xD(IpvideoContext *s) +{ + int x, y; + unsigned char P[4]; + unsigned char index = 0; + + /* 4-color block encoding: each 4x4 block is a different color */ + CHECK_STREAM_PTR(4); + + for (y = 0; y < 4; y++) + P[y] = *s->stream_ptr++; + + for (y = 0; y < 8; y++) { + if (y < 4) + index = 0; + else + index = 2; + + for (x = 0; x < 8; x++) { + if (x == 4) + index++; + *s->pixel_ptr++ = P[index]; + } + s->pixel_ptr += s->line_inc; + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0xE(IpvideoContext *s) +{ + int x, y; + unsigned char pix; + + /* 1-color encoding: the whole block is 1 solid color */ + CHECK_STREAM_PTR(1); + pix = *s->stream_ptr++; + + for (y = 0; y < 8; y++) { + for (x = 0; x < 8; x++) { + *s->pixel_ptr++ = pix; + } + s->pixel_ptr += s->line_inc; + } + + /* report success */ + return 0; +} + +static int ipvideo_decode_block_opcode_0xF(IpvideoContext *s) +{ + int x, y; + unsigned char sample0, sample1; + + /* dithered encoding */ + CHECK_STREAM_PTR(2); + sample0 = *s->stream_ptr++; + sample1 = *s->stream_ptr++; + + for (y = 0; y < 8; y++) { + for (x = 0; x < 8; x += 2) { + if (y & 1) { + *s->pixel_ptr++ = sample1; + *s->pixel_ptr++ = sample0; + } else { + *s->pixel_ptr++ = sample0; + *s->pixel_ptr++ = sample1; + } + } + s->pixel_ptr += s->line_inc; + } + + /* report success */ + return 0; +} + +static int (*ipvideo_decode_block[16])(IpvideoContext *s); + +static void ipvideo_decode_opcodes(IpvideoContext *s) +{ + int x, y; + int index = 0; + unsigned char opcode; + int ret; + int code_counts[16]; + static int frame = 0; + + debug_interplay("------------------ frame %d\n", frame); + frame++; + + for (x = 0; x < 16; x++) + code_counts[x] = 0; + + /* this is PAL8, so make the palette available */ + memcpy(s->current_frame.data[1], s->avctx->palctrl->palette, PALETTE_COUNT * 4); + + s->stride = s->current_frame.linesize[0]; + s->stream_ptr = s->buf + 14; /* data starts 14 bytes in */ + s->stream_end = s->buf + s->size; + s->line_inc = s->stride - 8; + s->upper_motion_limit_offset = (s->avctx->height - 8) * s->stride + + s->avctx->width - 8; + s->dsp = s->dsp; + + for (y = 0; y < (s->stride * s->avctx->height); y += s->stride * 8) { + for (x = y; x < y + s->avctx->width; x += 8) { + /* bottom nibble first, then top nibble (which makes it + * hard to use a GetBitcontext) */ + if (index & 1) + opcode = s->decoding_map[index >> 1] >> 4; + else + opcode = s->decoding_map[index >> 1] & 0xF; + index++; + + debug_interplay(" block @ (%3d, %3d): encoding 0x%X, data ptr @ %p\n", + x - y, y / s->stride, opcode, s->stream_ptr); + code_counts[opcode]++; + + s->pixel_ptr = s->current_frame.data[0] + x; + ret = ipvideo_decode_block[opcode](s); + if (ret != 0) { + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode problem on frame %d, @ block (%d, %d)\n", + frame, x - y, y / s->stride); + return; + } + } + } + if ((s->stream_ptr != s->stream_end) && + (s->stream_ptr + 1 != s->stream_end)) { + av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode finished with %td bytes left over\n", + s->stream_end - s->stream_ptr); + } +} + +static int ipvideo_decode_init(AVCodecContext *avctx) +{ + IpvideoContext *s = avctx->priv_data; + + s->avctx = avctx; + + if (s->avctx->palctrl == NULL) { + av_log(avctx, AV_LOG_ERROR, " Interplay video: palette expected.\n"); + return -1; + } + + avctx->pix_fmt = PIX_FMT_PAL8; + avctx->has_b_frames = 0; + dsputil_init(&s->dsp, avctx); + + /* decoding map contains 4 bits of information per 8x8 block */ + s->decoding_map_size = avctx->width * avctx->height / (8 * 8 * 2); + + /* assign block decode functions */ + ipvideo_decode_block[0x0] = ipvideo_decode_block_opcode_0x0; + ipvideo_decode_block[0x1] = ipvideo_decode_block_opcode_0x1; + ipvideo_decode_block[0x2] = ipvideo_decode_block_opcode_0x2; + ipvideo_decode_block[0x3] = ipvideo_decode_block_opcode_0x3; + ipvideo_decode_block[0x4] = ipvideo_decode_block_opcode_0x4; + ipvideo_decode_block[0x5] = ipvideo_decode_block_opcode_0x5; + ipvideo_decode_block[0x6] = ipvideo_decode_block_opcode_0x6; + ipvideo_decode_block[0x7] = ipvideo_decode_block_opcode_0x7; + ipvideo_decode_block[0x8] = ipvideo_decode_block_opcode_0x8; + ipvideo_decode_block[0x9] = ipvideo_decode_block_opcode_0x9; + ipvideo_decode_block[0xA] = ipvideo_decode_block_opcode_0xA; + ipvideo_decode_block[0xB] = ipvideo_decode_block_opcode_0xB; + ipvideo_decode_block[0xC] = ipvideo_decode_block_opcode_0xC; + ipvideo_decode_block[0xD] = ipvideo_decode_block_opcode_0xD; + ipvideo_decode_block[0xE] = ipvideo_decode_block_opcode_0xE; + ipvideo_decode_block[0xF] = ipvideo_decode_block_opcode_0xF; + + s->current_frame.data[0] = s->last_frame.data[0] = + s->second_last_frame.data[0] = NULL; + + return 0; +} + +static int ipvideo_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + IpvideoContext *s = avctx->priv_data; + AVPaletteControl *palette_control = avctx->palctrl; + + /* compressed buffer needs to be large enough to at least hold an entire + * decoding map */ + if (buf_size < s->decoding_map_size) + return buf_size; + + s->decoding_map = buf; + s->buf = buf + s->decoding_map_size; + s->size = buf_size - s->decoding_map_size; + + s->current_frame.reference = 3; + if (avctx->get_buffer(avctx, &s->current_frame)) { + av_log(avctx, AV_LOG_ERROR, " Interplay Video: get_buffer() failed\n"); + return -1; + } + + ipvideo_decode_opcodes(s); + + if (palette_control->palette_changed) { + palette_control->palette_changed = 0; + s->current_frame.palette_has_changed = 1; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = s->current_frame; + + /* shuffle frames */ + if (s->second_last_frame.data[0]) + avctx->release_buffer(avctx, &s->second_last_frame); + s->second_last_frame = s->last_frame; + s->last_frame = s->current_frame; + s->current_frame.data[0] = NULL; /* catch any access attempts */ + + /* report that the buffer was completely consumed */ + return buf_size; +} + +static int ipvideo_decode_end(AVCodecContext *avctx) +{ + IpvideoContext *s = avctx->priv_data; + + /* release the last frame */ + if (s->last_frame.data[0]) + avctx->release_buffer(avctx, &s->last_frame); + if (s->second_last_frame.data[0]) + avctx->release_buffer(avctx, &s->second_last_frame); + + return 0; +} + +AVCodec interplay_video_decoder = { + "interplayvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_INTERPLAY_VIDEO, + sizeof(IpvideoContext), + ipvideo_decode_init, + NULL, + ipvideo_decode_end, + ipvideo_decode_frame, + CODEC_CAP_DR1, +}; diff --git a/mpeg4/src/libavcodec/jfdctfst.c b/mpeg4/src/libavcodec/jfdctfst.c new file mode 100644 index 0000000000000000000000000000000000000000..804fd5766ed3c3b24403f6b687051445569b1350 --- /dev/null +++ b/mpeg4/src/libavcodec/jfdctfst.c @@ -0,0 +1,305 @@ +/* + * jfdctfst.c + * + * Copyright (C) 1994-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains a fast, not so accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). + * + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * This implementation is based on Arai, Agui, and Nakajima's algorithm for + * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in + * Japanese, but the algorithm is described in the Pennebaker & Mitchell + * JPEG textbook (see REFERENCES section in file README). The following code + * is based directly on figure 4-8 in P&M. + * While an 8-point DCT cannot be done in less than 11 multiplies, it is + * possible to arrange the computation so that many of the multiplies are + * simple scalings of the final outputs. These multiplies can then be + * folded into the multiplications or divisions by the JPEG quantization + * table entries. The AA&N method leaves only 5 multiplies and 29 adds + * to be done in the DCT itself. + * The primary disadvantage of this method is that with fixed-point math, + * accuracy is lost due to imprecise representation of the scaled + * quantization values. The smaller the quantization table entry, the less + * precise the scaled value, so this implementation does worse with high- + * quality-setting files than with low-quality ones. + */ + +/** + * @file jfdctfst.c + * Independent JPEG Group's fast AAN dct. + */ + +#include +#include +#include "common.h" +#include "dsputil.h" + +#define DCTSIZE 8 +#define GLOBAL(x) x +#define RIGHT_SHIFT(x, n) ((x) >> (n)) +#define SHIFT_TEMPS + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* Scaling decisions are generally the same as in the LL&M algorithm; + * see jfdctint.c for more details. However, we choose to descale + * (right shift) multiplication products as soon as they are formed, + * rather than carrying additional fractional bits into subsequent additions. + * This compromises accuracy slightly, but it lets us save a few shifts. + * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples) + * everywhere except in the multiplications proper; this saves a good deal + * of work on 16-bit-int machines. + * + * Again to save a few shifts, the intermediate results between pass 1 and + * pass 2 are not upscaled, but are represented only to integral precision. + * + * A final compromise is to represent the multiplicative constants to only + * 8 fractional bits, rather than 13. This saves some shifting work on some + * machines, and may also reduce the cost of multiplication (since there + * are fewer one-bits in the constants). + */ + +#define CONST_BITS 8 + + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 8 +#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */ +#else +#define FIX_0_382683433 FIX(0.382683433) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_707106781 FIX(0.707106781) +#define FIX_1_306562965 FIX(1.306562965) +#endif + + +/* We can gain a little more speed, with a further compromise in accuracy, + * by omitting the addition in a descaling shift. This yields an incorrectly + * rounded result half the time... + */ + +#ifndef USE_ACCURATE_ROUNDING +#undef DESCALE +#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#endif + + +/* Multiply a DCTELEM variable by an int32_t constant, and immediately + * descale to yield a DCTELEM result. + */ + +#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) + +static always_inline void row_fdct(DCTELEM * data){ + int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_fast16_t tmp10, tmp11, tmp12, tmp13; + int_fast16_t z1, z2, z3, z4, z5, z11, z13; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[0] + dataptr[7]; + tmp7 = dataptr[0] - dataptr[7]; + tmp1 = dataptr[1] + dataptr[6]; + tmp6 = dataptr[1] - dataptr[6]; + tmp2 = dataptr[2] + dataptr[5]; + tmp5 = dataptr[2] - dataptr[5]; + tmp3 = dataptr[3] + dataptr[4]; + tmp4 = dataptr[3] - dataptr[4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[0] = tmp10 + tmp11; /* phase 3 */ + dataptr[4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[3] = z13 - z2; + dataptr[1] = z11 + z4; + dataptr[7] = z11 - z4; + + dataptr += DCTSIZE; /* advance pointer to next row */ + } +} + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +fdct_ifast (DCTELEM * data) +{ + int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_fast16_t tmp10, tmp11, tmp12, tmp13; + int_fast16_t z1, z2, z3, z4, z5, z11, z13; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + row_fdct(data); + + /* Pass 2: process columns. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ + dataptr[DCTSIZE*4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ + dataptr[DCTSIZE*6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ + dataptr[DCTSIZE*3] = z13 - z2; + dataptr[DCTSIZE*1] = z11 + z4; + dataptr[DCTSIZE*7] = z11 - z4; + + dataptr++; /* advance pointer to next column */ + } +} + +/* + * Perform the forward 2-4-8 DCT on one block of samples. + */ + +GLOBAL(void) +fdct_ifast248 (DCTELEM * data) +{ + int_fast16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_fast16_t tmp10, tmp11, tmp12, tmp13; + int_fast16_t z1; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + row_fdct(data); + + /* Pass 2: process columns. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; + tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; + tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + tmp13 = tmp0 - tmp3; + + dataptr[DCTSIZE*0] = tmp10 + tmp11; + dataptr[DCTSIZE*4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); + dataptr[DCTSIZE*2] = tmp13 + z1; + dataptr[DCTSIZE*6] = tmp13 - z1; + + tmp10 = tmp4 + tmp7; + tmp11 = tmp5 + tmp6; + tmp12 = tmp5 - tmp6; + tmp13 = tmp4 - tmp7; + + dataptr[DCTSIZE*1] = tmp10 + tmp11; + dataptr[DCTSIZE*5] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); + dataptr[DCTSIZE*3] = tmp13 + z1; + dataptr[DCTSIZE*7] = tmp13 - z1; + + dataptr++; /* advance pointer to next column */ + } +} + + +#undef GLOBAL +#undef CONST_BITS +#undef DESCALE +#undef FIX_0_541196100 +#undef FIX_1_306562965 diff --git a/mpeg4/src/libavcodec/jfdctint.c b/mpeg4/src/libavcodec/jfdctint.c new file mode 100644 index 0000000000000000000000000000000000000000..41d27499134e659edfb7dd109e1b69d4ac3d9ccd --- /dev/null +++ b/mpeg4/src/libavcodec/jfdctint.c @@ -0,0 +1,373 @@ +/* + * jfdctint.c + * + * Copyright (C) 1991-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains a slow-but-accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). + * + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + */ + +/** + * @file jfdctint.c + * Independent JPEG Group's slow & accurate dct. + */ + +#include +#include +#include "common.h" +#include "dsputil.h" + +#define SHIFT_TEMPS +#define DCTSIZE 8 +#define BITS_IN_JSAMPLE 8 +#define GLOBAL(x) x +#define RIGHT_SHIFT(x, n) ((x) >> (n)) +#define MULTIPLY16C16(var,const) ((var)*(const)) + +#if 1 //def USE_ACCURATE_ROUNDING +#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n) +#else +#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#endif + + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* + * The poop on this scaling stuff is as follows: + * + * Each 1-D DCT step produces outputs which are a factor of sqrt(N) + * larger than the true DCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D DCT, + * because the y0 and y4 outputs need not be divided by sqrt(N). + * In the IJG code, this factor of 8 is removed by the quantization step + * (in jcdctmgr.c), NOT in this module. + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (For 12-bit sample data, the intermediate + * array is int32_t anyway.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#if BITS_IN_JSAMPLE == 8 +#define CONST_BITS 13 +#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */ +#else +#define CONST_BITS 13 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 13 +#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */ +#else +#define FIX_0_298631336 FIX(0.298631336) +#define FIX_0_390180644 FIX(0.390180644) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_765366865 FIX(0.765366865) +#define FIX_0_899976223 FIX(0.899976223) +#define FIX_1_175875602 FIX(1.175875602) +#define FIX_1_501321110 FIX(1.501321110) +#define FIX_1_847759065 FIX(1.847759065) +#define FIX_1_961570560 FIX(1.961570560) +#define FIX_2_053119869 FIX(2.053119869) +#define FIX_2_562915447 FIX(2.562915447) +#define FIX_3_072711026 FIX(3.072711026) +#endif + + +/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. + * For 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2 +#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#else +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +static always_inline void row_fdct(DCTELEM * data){ + int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_fast32_t tmp10, tmp11, tmp12, tmp13; + int_fast32_t z1, z2, z3, z4, z5; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[0] + dataptr[7]; + tmp7 = dataptr[0] - dataptr[7]; + tmp1 = dataptr[1] + dataptr[6]; + tmp6 = dataptr[1] - dataptr[6]; + tmp2 = dataptr[2] + dataptr[5]; + tmp5 = dataptr[2] - dataptr[5]; + tmp3 = dataptr[3] + dataptr[4]; + tmp4 = dataptr[3] - dataptr[4]; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); + dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS-PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * cK represents cos(K*pi/16). + * i0..i3 in the paper are tmp4..tmp7 here. + */ + + z1 = tmp4 + tmp7; + z2 = tmp5 + tmp6; + z3 = tmp4 + tmp6; + z4 = tmp5 + tmp7; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ + + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } +} + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +ff_jpeg_fdct_islow (DCTELEM * data) +{ + int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_fast32_t tmp10, tmp11, tmp12, tmp13; + int_fast32_t z1, z2, z3, z4, z5; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + row_fdct(data); + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS+PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * cK represents cos(K*pi/16). + * i0..i3 in the paper are tmp4..tmp7 here. + */ + + z1 = tmp4 + tmp7; + z2 = tmp5 + tmp6; + z3 = tmp4 + tmp6; + z4 = tmp5 + tmp7; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ + + tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + + z3 += z5; + z4 += z5; + + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + +/* + * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT + * on the rows and then, instead of doing even and odd, part on the colums + * you do even part two times. + */ +GLOBAL(void) +ff_fdct248_islow (DCTELEM * data) +{ + int_fast32_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int_fast32_t tmp10, tmp11, tmp12, tmp13; + int_fast32_t z1; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + row_fdct(data); + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; + tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; + tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + tmp13 = tmp0 - tmp3; + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS+PASS1_BITS); + + tmp10 = tmp4 + tmp7; + tmp11 = tmp5 + tmp6; + tmp12 = tmp5 - tmp6; + tmp13 = tmp4 - tmp7; + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} diff --git a/mpeg4/src/libavcodec/jpeg_ls.c b/mpeg4/src/libavcodec/jpeg_ls.c new file mode 100644 index 0000000000000000000000000000000000000000..4b365bb4adb5778f45a9faab6bacbcb857271927 --- /dev/null +++ b/mpeg4/src/libavcodec/jpeg_ls.c @@ -0,0 +1,843 @@ +/* + * JPEG-LS encoder and decoder + * Copyright (c) 2003 Michael Niedermayer + * Copyright (c) 2006 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "golomb.h" + +/** + * @file jpeg_ls.c + * JPEG-LS encoder and decoder. + */ + +typedef struct JpeglsContext{ + AVCodecContext *avctx; + AVFrame picture; +}JpeglsContext; + +typedef struct JLSState{ + int T1, T2, T3; + int A[367], B[367], C[365], N[367]; + int limit, reset, bpp, qbpp, maxval, range; + int near, twonear; + int run_index[3]; +}JLSState; + +static const uint8_t log2_run[32]={ + 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, + 8, 9,10,11,12,13,14,15 +}; + +/* +* Uncomment this to significantly speed up decoding of broken JPEG-LS +* (or test broken JPEG-LS decoder) and slow down ordinary decoding a bit. +* +* There is no Golomb code with length >= 32 bits possible, so check and +* avoid situation of 32 zeros, FFmpeg Golomb decoder is painfully slow +* on this errors. +*/ +//#define JLS_BROKEN + +/********** Functions for both encoder and decoder **********/ + +/** + * Calculate initial JPEG-LS parameters + */ +static void ls_init_state(JLSState *state){ + int i; + + state->twonear = state->near * 2 + 1; + state->range = ((state->maxval + state->twonear - 1) / state->twonear) + 1; + + // QBPP = ceil(log2(RANGE)) + for(state->qbpp = 0; (1 << state->qbpp) < state->range; state->qbpp++); + + if(state->bpp < 8) + state->limit = 16 + 2 * state->bpp - state->qbpp; + else + state->limit = (4 * state->bpp) - state->qbpp; + + for(i = 0; i < 367; i++) { + state->A[i] = (state->range + 32) >> 6; + if(state->A[i] < 2) + state->A[i] = 2; + state->N[i] = 1; + } + +} + +/** + * Calculate quantized gradient value, used for context determination + */ +static inline int quantize(JLSState *s, int v){ //FIXME optimize + if(v==0) return 0; + if(v < 0){ + if(v <= -s->T3) return -4; + if(v <= -s->T2) return -3; + if(v <= -s->T1) return -2; + if(v < -s->near) return -1; + return 0; + }else{ + if(v <= s->near) return 0; + if(v < s->T1) return 1; + if(v < s->T2) return 2; + if(v < s->T3) return 3; + return 4; + } +} + +/** + * Custom value clipping function used in T1, T2, T3 calculation + */ +static inline int iso_clip(int v, int vmin, int vmax){ + if(v > vmax || v < vmin) return vmin; + else return v; +} + +/** + * Calculate JPEG-LS codec values + */ +static void reset_ls_coding_parameters(JLSState *s, int reset_all){ + const int basic_t1= 3; + const int basic_t2= 7; + const int basic_t3= 21; + int factor; + + if(s->maxval==0 || reset_all) s->maxval= (1 << s->bpp) - 1; + + if(s->maxval >=128){ + factor= (FFMIN(s->maxval, 4095) + 128)>>8; + + if(s->T1==0 || reset_all) + s->T1= iso_clip(factor*(basic_t1-2) + 2 + 3*s->near, s->near+1, s->maxval); + if(s->T2==0 || reset_all) + s->T2= iso_clip(factor*(basic_t2-3) + 3 + 5*s->near, s->T1, s->maxval); + if(s->T3==0 || reset_all) + s->T3= iso_clip(factor*(basic_t3-4) + 4 + 7*s->near, s->T2, s->maxval); + }else{ + factor= 256 / (s->maxval + 1); + + if(s->T1==0 || reset_all) + s->T1= iso_clip(FFMAX(2, basic_t1/factor + 3*s->near), s->near+1, s->maxval); + if(s->T2==0 || reset_all) + s->T2= iso_clip(FFMAX(3, basic_t2/factor + 5*s->near), s->T1, s->maxval); + if(s->T3==0 || reset_all) + s->T3= iso_clip(FFMAX(4, basic_t3/factor + 6*s->near), s->T2, s->maxval); + } + + if(s->reset==0 || reset_all) s->reset= 64; +// av_log(NULL, AV_LOG_DEBUG, "[JPEG-LS RESET] T=%i,%i,%i\n", s->T1, s->T2, s->T3); +} + + +/********** Decoder-specific functions **********/ + +/** + * Decode LSE block with initialization parameters + */ +static int decode_lse(MJpegDecodeContext *s) +{ + int len, id; + + /* XXX: verify len field validity */ + len = get_bits(&s->gb, 16); + id = get_bits(&s->gb, 8); + + switch(id){ + case 1: + s->maxval= get_bits(&s->gb, 16); + s->t1= get_bits(&s->gb, 16); + s->t2= get_bits(&s->gb, 16); + s->t3= get_bits(&s->gb, 16); + s->reset= get_bits(&s->gb, 16); + +// reset_ls_coding_parameters(s, 0); + //FIXME quant table? + break; + case 2: + case 3: + av_log(s->avctx, AV_LOG_ERROR, "palette not supported\n"); + return -1; + case 4: + av_log(s->avctx, AV_LOG_ERROR, "oversize image not supported\n"); + return -1; + default: + av_log(s->avctx, AV_LOG_ERROR, "invalid id %d\n", id); + return -1; + } +// av_log(s->avctx, AV_LOG_DEBUG, "ID=%i, T=%i,%i,%i\n", id, s->t1, s->t2, s->t3); + + return 0; +} + + +/** + * Get context-dependent Golomb code, decode it and update context + */ +static inline int ls_get_code_regular(GetBitContext *gb, JLSState *state, int Q){ + int k, ret; + + for(k = 0; (state->N[Q] << k) < state->A[Q]; k++); + +#ifdef JLS_BROKEN + if(!show_bits_long(gb, 32))return -1; +#endif + ret = get_ur_golomb_jpegls(gb, k, state->limit, state->qbpp); + + /* decode mapped error */ + if(ret & 1) + ret = -((ret + 1) >> 1); + else + ret >>= 1; + + /* for NEAR=0, k=0 and 2*B[Q] <= - N[Q] mapping is reversed */ + if(!state->near && !k && (2 * state->B[Q] <= -state->N[Q])) + ret = -(ret + 1); + + state->A[Q] += ABS(ret); + ret *= state->twonear; + state->B[Q] += ret; + + if(state->N[Q] == state->reset) { + state->A[Q] >>= 1; + state->B[Q] >>= 1; + state->N[Q] >>= 1; + } + state->N[Q]++; + + if(state->B[Q] <= -state->N[Q]) { + state->B[Q] += state->N[Q]; + if(state->C[Q] > -128) + state->C[Q]--; + if(state->B[Q] <= -state->N[Q]) + state->B[Q] = -state->N[Q] + 1; + }else if(state->B[Q] > 0){ + state->B[Q] -= state->N[Q]; + if(state->C[Q] < 127) + state->C[Q]++; + if(state->B[Q] > 0) + state->B[Q] = 0; + } + + return ret; +} + +/** + * Get Golomb code, decode it and update state for run termination + */ +static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state, int RItype, int limit_add){ + int k, ret, temp, map; + int Q = 365 + RItype; + + if(!RItype) + temp = state->A[Q]; + else + temp = state->A[Q] + (state->N[Q] >> 1); + + for(k = 0; (state->N[Q] << k) < temp; k++); + +#ifdef JLS_BROKEN + if(!show_bits_long(gb, 32))return -1; +#endif + ret = get_ur_golomb_jpegls(gb, k, state->limit - limit_add - 1, state->qbpp); + + /* decode mapped error */ + map = 0; + if(!k && (RItype || ret) && (2 * state->B[Q] < state->N[Q])) + map = 1; + ret += RItype + map; + + if(ret & 1){ + ret = map - ((ret + 1) >> 1); + state->B[Q]++; + } else { + ret = ret >> 1; + } + + /* update state */ + state->A[Q] += ABS(ret) - RItype; + ret *= state->twonear; + if(state->N[Q] == state->reset){ + state->A[Q] >>=1; + state->B[Q] >>=1; + state->N[Q] >>=1; + } + state->N[Q]++; + + return ret; +} + +/** + * Decode one line of image + */ +static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s, uint8_t *last, uint8_t *dst, int last2, int w, int stride, int comp){ + int i, x = 0; + int Ra, Rb, Rc, Rd; + int D0, D1, D2; + + while(x < w) { + int err, pred; + + /* compute gradients */ + Ra = x ? dst[x - stride] : last[x]; + Rb = last[x]; + Rc = x ? last[x - stride] : last2; + Rd = (x >= w - stride) ? last[x] : last[x + stride]; + D0 = Rd - Rb; + D1 = Rb - Rc; + D2 = Rc - Ra; + /* run mode */ + if((ABS(D0) <= state->near) && (ABS(D1) <= state->near) && (ABS(D2) <= state->near)) { + int r; + int RItype; + + /* decode full runs while available */ + while(get_bits1(&s->gb)) { + int r; + r = 1 << log2_run[state->run_index[comp]]; + if(x + r * stride > w) { + r = (w - x) / stride; + } + for(i = 0; i < r; i++) { + dst[x] = Ra; + x += stride; + } + /* if EOL reached, we stop decoding */ + if(r != (1 << log2_run[state->run_index[comp]])) + return; + if(state->run_index[comp] < 31) + state->run_index[comp]++; + if(x + stride > w) + return; + } + /* decode aborted run */ + r = log2_run[state->run_index[comp]]; + if(r) + r = get_bits_long(&s->gb, r); + for(i = 0; i < r; i++) { + dst[x] = Ra; + x += stride; + } + + /* decode run termination value */ + Rb = last[x]; + RItype = (ABS(Ra - Rb) <= state->near) ? 1 : 0; + err = ls_get_code_runterm(&s->gb, state, RItype, log2_run[state->run_index[comp]]); + if(state->run_index[comp]) + state->run_index[comp]--; + + if(state->near && RItype){ + pred = Ra + err; + } else { + if(Rb < Ra) + pred = Rb - err; + else + pred = Rb + err; + } + + if(state->near){ + if(pred < -state->near) + pred += state->range * state->twonear; + else if(pred > state->maxval + state->near) + pred -= state->range * state->twonear; + pred = clip(pred, 0, state->maxval); + } + + dst[x] = pred; + x += stride; + } else { /* regular mode */ + int context, sign; + + context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2); + pred = mid_pred(Ra, Ra + Rb - Rc, Rb); + + if(context < 0){ + context = -context; + sign = 1; + }else{ + sign = 0; + } + + if(sign){ + pred = clip(pred - state->C[context], 0, state->maxval); + err = -ls_get_code_regular(&s->gb, state, context); + } else { + pred = clip(pred + state->C[context], 0, state->maxval); + err = ls_get_code_regular(&s->gb, state, context); + } + + /* we have to do something more for near-lossless coding */ + pred += err; + if(state->near) { + if(pred < -state->near) + pred += state->range * state->twonear; + else if(pred > state->maxval + state->near) + pred -= state->range * state->twonear; + pred = clip(pred, 0, state->maxval); + } + + dst[x] = pred; + x += stride; + } + } +} + +static int ls_decode_picture(MJpegDecodeContext *s, int near, int point_transform, int ilv){ + int i, t = 0; + uint8_t *zero, *last, *cur; + JLSState *state; + int off, stride, width; + + zero = av_mallocz(s->picture.linesize[0]); + last = zero; + cur = s->picture.data[0]; + + state = av_mallocz(sizeof(JLSState)); + /* initialize JPEG-LS state from JPEG parameters */ + state->near = near; + state->bpp = (s->bits < 2) ? 2 : s->bits; + state->maxval = s->maxval; + state->T1 = s->t1; + state->T2 = s->t2; + state->T3 = s->t3; + state->reset = s->reset; + reset_ls_coding_parameters(state, 0); + ls_init_state(state); + +// av_log(s->avctx, AV_LOG_DEBUG, "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",s->width,s->height,state->near,state->maxval,state->T1,state->T2,state->T3,state->reset,state->limit,state->qbpp, state->range); +// av_log(s->avctx, AV_LOG_DEBUG, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n", ilv, point_transform, s->bits, s->cur_scan); + if(ilv == 0) { /* separate planes */ + off = s->cur_scan - 1; + stride = (s->nb_components > 1) ? 3 : 1; + width = s->width * stride; + cur += off; + for(i = 0; i < s->height; i++) { + ls_decode_line(state, s, last, cur, t, width, stride, off); + t = last[0]; + last = cur; + cur += s->picture.linesize[0]; + + if (s->restart_interval && !--s->restart_count) { + align_get_bits(&s->gb); + skip_bits(&s->gb, 16); /* skip RSTn */ + } + } + } else if(ilv == 1) { /* line interleaving */ + int j; + int Rc[3] = {0, 0, 0}; + memset(cur, 0, s->picture.linesize[0]); + width = s->width * 3; + for(i = 0; i < s->height; i++) { + for(j = 0; j < 3; j++) { + ls_decode_line(state, s, last + j, cur + j, Rc[j], width, 3, j); + Rc[j] = last[j]; + + if (s->restart_interval && !--s->restart_count) { + align_get_bits(&s->gb); + skip_bits(&s->gb, 16); /* skip RSTn */ + } + } + last = cur; + cur += s->picture.linesize[0]; + } + } else if(ilv == 2) { /* sample interleaving */ + av_log(s->avctx, AV_LOG_ERROR, "Sample interleaved images are not supported.\n"); + return -1; + } + + av_free(state); + av_free(zero); + + return 0; +} + +#if defined(CONFIG_ENCODERS) && defined(CONFIG_JPEGLS_ENCODER) +/********** Encoder-specific functions **********/ + +/** + * Encode error from regular symbol + */ +static inline void ls_encode_regular(JLSState *state, PutBitContext *pb, int Q, int err){ + int k; + int val; + int map; + + for(k = 0; (state->N[Q] << k) < state->A[Q]; k++); + + map = !state->near && !k && (2 * state->B[Q] <= -state->N[Q]); + + if(err < 0) + err += state->range; + if(err >= ((state->range + 1) >> 1)) { + err -= state->range; + val = 2 * ABS(err) - 1 - map; + } else + val = 2 * err + map; + + set_ur_golomb_jpegls(pb, val, k, state->limit, state->qbpp); + + state->A[Q] += ABS(err); + state->B[Q] += err * state->twonear; + + if(state->N[Q] == state->reset) { + state->A[Q] >>= 1; + state->B[Q] >>= 1; + state->N[Q] >>= 1; + } + state->N[Q]++; + + if(state->B[Q] <= -state->N[Q]) { + state->B[Q] += state->N[Q]; + if(state->C[Q] > -128) + state->C[Q]--; + if(state->B[Q] <= -state->N[Q]) + state->B[Q] = -state->N[Q] + 1; + }else if(state->B[Q] > 0){ + state->B[Q] -= state->N[Q]; + if(state->C[Q] < 127) + state->C[Q]++; + if(state->B[Q] > 0) + state->B[Q] = 0; + } +} + +/** + * Encode error from run termination + */ +static inline void ls_encode_runterm(JLSState *state, PutBitContext *pb, int RItype, int err, int limit_add){ + int k; + int val, map; + int Q = 365 + RItype; + int temp; + + temp = state->A[Q]; + if(RItype) + temp += state->N[Q] >> 1; + for(k = 0; (state->N[Q] << k) < temp; k++); + map = 0; + if(!k && err && (2 * state->B[Q] < state->N[Q])) + map = 1; + + if(err < 0) + val = - (2 * err) - 1 - RItype + map; + else + val = 2 * err - RItype - map; + set_ur_golomb_jpegls(pb, val, k, state->limit - limit_add - 1, state->qbpp); + + if(err < 0) + state->B[Q]++; + state->A[Q] += (val + 1 - RItype) >> 1; + + if(state->N[Q] == state->reset) { + state->A[Q] >>= 1; + state->B[Q] >>= 1; + state->N[Q] >>= 1; + } + state->N[Q]++; +} + +/** + * Encode run value as specified by JPEG-LS standard + */ +static inline void ls_encode_run(JLSState *state, PutBitContext *pb, int run, int comp, int trail){ + while(run >= (1 << log2_run[state->run_index[comp]])){ + put_bits(pb, 1, 1); + run -= 1 << log2_run[state->run_index[comp]]; + if(state->run_index[comp] < 31) + state->run_index[comp]++; + } + /* if hit EOL, encode another full run, else encode aborted run */ + if(!trail && run) { + put_bits(pb, 1, 1); + }else if(trail){ + put_bits(pb, 1, 0); + if(log2_run[state->run_index[comp]]) + put_bits(pb, log2_run[state->run_index[comp]], run); + } +} + +/** + * Encode one line of image + */ +static inline void ls_encode_line(JLSState *state, PutBitContext *pb, uint8_t *last, uint8_t *cur, int last2, int w, int stride, int comp){ + int x = 0; + int Ra, Rb, Rc, Rd; + int D0, D1, D2; + + while(x < w) { + int err, pred, sign; + + /* compute gradients */ + Ra = x ? cur[x - stride] : last[x]; + Rb = last[x]; + Rc = x ? last[x - stride] : last2; + Rd = (x >= w - stride) ? last[x] : last[x + stride]; + D0 = Rd - Rb; + D1 = Rb - Rc; + D2 = Rc - Ra; + + /* run mode */ + if((ABS(D0) <= state->near) && (ABS(D1) <= state->near) && (ABS(D2) <= state->near)) { + int RUNval, RItype, run; + + run = 0; + RUNval = Ra; + while(x < w && (ABS(cur[x] - RUNval) <= state->near)){ + run++; + cur[x] = Ra; + x += stride; + } + ls_encode_run(state, pb, run, comp, x < w); + if(x >= w) + return; + Rb = last[x]; + RItype = (ABS(Ra - Rb) <= state->near); + pred = RItype ? Ra : Rb; + err = cur[x] - pred; + + if(!RItype && Ra > Rb) + err = -err; + + if(state->near){ + if(err > 0) + err = (state->near + err) / state->twonear; + else + err = -(state->near - err) / state->twonear; + + if(RItype || (Rb >= Ra)) + Ra = clip(pred + err * state->twonear, 0, state->maxval); + else + Ra = clip(pred - err * state->twonear, 0, state->maxval); + cur[x] = Ra; + } + if(err < 0) + err += state->range; + if(err >= ((state->range + 1) >> 1)) + err -= state->range; + + ls_encode_runterm(state, pb, RItype, err, log2_run[state->run_index[comp]]); + + if(state->run_index[comp] > 0) + state->run_index[comp]--; + x += stride; + } else { /* regular mode */ + int context; + + context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2); + pred = mid_pred(Ra, Ra + Rb - Rc, Rb); + + if(context < 0){ + context = -context; + sign = 1; + pred = clip(pred - state->C[context], 0, state->maxval); + err = pred - cur[x]; + }else{ + sign = 0; + pred = clip(pred + state->C[context], 0, state->maxval); + err = cur[x] - pred; + } + + if(state->near){ + if(err > 0) + err = (state->near + err) / state->twonear; + else + err = -(state->near - err) / state->twonear; + if(!sign) + Ra = clip(pred + err * state->twonear, 0, state->maxval); + else + Ra = clip(pred - err * state->twonear, 0, state->maxval); + cur[x] = Ra; + } + + ls_encode_regular(state, pb, context, err); + x += stride; + } + } +} + +static void ls_store_lse(JLSState *state, PutBitContext *pb){ + /* Test if we have default params and don't need to store LSE */ + JLSState state2; + memset(&state2, 0, sizeof(JLSState)); + state2.bpp = 8; + state2.near = state->near; + reset_ls_coding_parameters(&state2, 1); + if(state->T1 == state2.T1 && state->T2 == state2.T2 && state->T3 == state2.T3 && state->reset == state2.reset) + return; + /* store LSE type 1 */ + put_marker(pb, LSE); + put_bits(pb, 16, 13); + put_bits(pb, 8, 1); + put_bits(pb, 16, state->maxval); + put_bits(pb, 16, state->T1); + put_bits(pb, 16, state->T2); + put_bits(pb, 16, state->T3); + put_bits(pb, 16, state->reset); +} + +static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + JpeglsContext * const s = avctx->priv_data; + AVFrame *pict = data; + AVFrame * const p= (AVFrame*)&s->picture; + const int near = avctx->prediction_method; + PutBitContext pb, pb2; + GetBitContext gb; + uint8_t *buf2, *zero, *cur, *last; + JLSState *state; + int i, size; + int comps; + + buf2 = av_malloc(buf_size); + + init_put_bits(&pb, buf, buf_size); + init_put_bits(&pb2, buf2, buf_size); + + *p = *pict; + p->pict_type= FF_I_TYPE; + p->key_frame= 1; + + comps = (avctx->pix_fmt == PIX_FMT_GRAY8) ? 1 : 3; + + /* write our own JPEG header, can't use mjpeg_picture_header */ + put_marker(&pb, SOI); + put_marker(&pb, SOF48); + put_bits(&pb, 16, 8 + comps * 3); // header size depends on components + put_bits(&pb, 8, 8); // bpp + put_bits(&pb, 16, avctx->height); + put_bits(&pb, 16, avctx->width); + put_bits(&pb, 8, comps); // components + for(i = 1; i <= comps; i++) { + put_bits(&pb, 8, i); // component ID + put_bits(&pb, 8, 0x11); // subsampling: none + put_bits(&pb, 8, 0); // Tiq, used by JPEG-LS ext + } + + put_marker(&pb, SOS); + put_bits(&pb, 16, 6 + comps * 2); + put_bits(&pb, 8, comps); + for(i = 1; i <= comps; i++) { + put_bits(&pb, 8, i); // component ID + put_bits(&pb, 8, 0); // mapping index: none + } + put_bits(&pb, 8, near); + put_bits(&pb, 8, (comps > 1) ? 1 : 0); // interleaving: 0 - plane, 1 - line + put_bits(&pb, 8, 0); // point transform: none + + state = av_mallocz(sizeof(JLSState)); + /* initialize JPEG-LS state from JPEG parameters */ + state->near = near; + state->bpp = 8; + reset_ls_coding_parameters(state, 0); + ls_init_state(state); + + ls_store_lse(state, &pb); + + zero = av_mallocz(p->linesize[0]); + last = zero; + cur = p->data[0]; + if(avctx->pix_fmt == PIX_FMT_GRAY8){ + int t = 0; + + for(i = 0; i < avctx->height; i++) { + ls_encode_line(state, &pb2, last, cur, t, avctx->width, 1, 0); + t = last[0]; + last = cur; + cur += p->linesize[0]; + } + }else if(avctx->pix_fmt == PIX_FMT_RGB24){ + int j, width; + int Rc[3] = {0, 0, 0}; + + width = avctx->width * 3; + for(i = 0; i < avctx->height; i++) { + for(j = 0; j < 3; j++) { + ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j); + Rc[j] = last[j]; + } + last = cur; + cur += s->picture.linesize[0]; + } + }else if(avctx->pix_fmt == PIX_FMT_BGR24){ + int j, width; + int Rc[3] = {0, 0, 0}; + + width = avctx->width * 3; + for(i = 0; i < avctx->height; i++) { + for(j = 2; j >= 0; j--) { + ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j); + Rc[j] = last[j]; + } + last = cur; + cur += s->picture.linesize[0]; + } + } + + av_free(zero); + av_free(state); + + flush_put_bits(&pb2); + /* do escape coding */ + size = put_bits_count(&pb2) >> 3; + init_get_bits(&gb, buf2, size); + while(get_bits_count(&gb) < size * 8){ + int v; + v = get_bits(&gb, 8); + put_bits(&pb, 8, v); + if(v == 0xFF){ + v = get_bits(&gb, 7); + put_bits(&pb, 8, v); + } + } + align_put_bits(&pb); + av_free(buf2); + + /* End of image */ + put_marker(&pb, EOI); + flush_put_bits(&pb); + + emms_c(); + + return put_bits_count(&pb) >> 3; +} + +static int encode_init_ls(AVCodecContext *ctx) { + JpeglsContext *c = (JpeglsContext*)ctx->priv_data; + + c->avctx = ctx; + ctx->coded_frame = &c->picture; + + if(ctx->pix_fmt != PIX_FMT_GRAY8 && ctx->pix_fmt != PIX_FMT_RGB24 && ctx->pix_fmt != PIX_FMT_BGR24){ + av_log(ctx, AV_LOG_ERROR, "Only grayscale and RGB24/BGR24 images are supported\n"); + return -1; + } + return 0; +} + +AVCodec jpegls_encoder = { //FIXME avoid MPV_* lossless jpeg shouldnt need them + "jpegls", + CODEC_TYPE_VIDEO, + CODEC_ID_JPEGLS, + sizeof(JpeglsContext), + encode_init_ls, + encode_picture_ls, + NULL, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_BGR24, PIX_FMT_RGB24, PIX_FMT_GRAY8, -1}, +}; +#endif diff --git a/mpeg4/src/libavcodec/jrevdct.c b/mpeg4/src/libavcodec/jrevdct.c new file mode 100644 index 0000000000000000000000000000000000000000..dc2ffaff71c4731fdb003a8a9b6f7111b80b5c6d --- /dev/null +++ b/mpeg4/src/libavcodec/jrevdct.c @@ -0,0 +1,1126 @@ +/* + * jrevdct.c + * + * Copyright (C) 1991, 1992, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains the basic inverse-DCT transformation subroutine. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * I've made lots of modifications to attempt to take advantage of the + * sparse nature of the DCT matrices we're getting. Although the logic + * is cumbersome, it's straightforward and the resulting code is much + * faster. + * + * A better way to do this would be to pass in the DCT block as a sparse + * matrix, perhaps with the difference cases encoded. + */ + +/** + * @file jrevdct.c + * Independent JPEG Group's LLM idct. + */ + +#include "common.h" +#include "dsputil.h" + +#define EIGHT_BIT_SAMPLES + +#define DCTSIZE 8 +#define DCTSIZE2 64 + +#define GLOBAL + +#define RIGHT_SHIFT(x, n) ((x) >> (n)) + +typedef DCTELEM DCTBLOCK[DCTSIZE2]; + +#define CONST_BITS 13 + +/* + * This routine is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* + * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * The poop on this scaling stuff is as follows: + * + * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) + * larger than the true IDCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D IDCT, + * because the y0 and y4 inputs need not be divided by sqrt(N). + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (To scale up 12-bit sample data further, an + * intermediate int32 array would be needed.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#ifdef EIGHT_BIT_SAMPLES +#define PASS1_BITS 2 +#else +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +#define ONE ((int32_t) 1) + +#define CONST_SCALE (ONE << CONST_BITS) + +/* Convert a positive real constant to an integer scaled by CONST_SCALE. + * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, + * you will pay a significant penalty in run time. In that case, figure + * the correct integer constant values and insert them by hand. + */ + +/* Actually FIX is no longer used, we precomputed them all */ +#define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) + +/* Descale and correctly round an int32_t value that's scaled by N bits. + * We assume RIGHT_SHIFT rounds towards minus infinity, so adding + * the fudge factor is correct for either sign of X. + */ + +#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) + +/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; + * this provides a useful speedup on many machines. + * There is no way to specify a 16x16->32 multiply in portable C, but + * some C compilers will do the right thing if you provide the correct + * combination of casts. + * NB: for 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#ifdef EIGHT_BIT_SAMPLES +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) +#endif +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) +#endif +#endif + +#ifndef MULTIPLY /* default definition */ +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +/* + Unlike our decoder where we approximate the FIXes, we need to use exact +ones here or successive P-frames will drift too much with Reference frame coding +*/ +#define FIX_0_211164243 1730 +#define FIX_0_275899380 2260 +#define FIX_0_298631336 2446 +#define FIX_0_390180644 3196 +#define FIX_0_509795579 4176 +#define FIX_0_541196100 4433 +#define FIX_0_601344887 4926 +#define FIX_0_765366865 6270 +#define FIX_0_785694958 6436 +#define FIX_0_899976223 7373 +#define FIX_1_061594337 8697 +#define FIX_1_111140466 9102 +#define FIX_1_175875602 9633 +#define FIX_1_306562965 10703 +#define FIX_1_387039845 11363 +#define FIX_1_451774981 11893 +#define FIX_1_501321110 12299 +#define FIX_1_662939225 13623 +#define FIX_1_847759065 15137 +#define FIX_1_961570560 16069 +#define FIX_2_053119869 16819 +#define FIX_2_172734803 17799 +#define FIX_2_562915447 20995 +#define FIX_3_072711026 25172 + +/* + * Perform the inverse DCT on one block of coefficients. + */ + +void j_rev_dct(DCTBLOCK data) +{ + int32_t tmp0, tmp1, tmp2, tmp3; + int32_t tmp10, tmp11, tmp12, tmp13; + int32_t z1, z2, z3, z4, z5; + int32_t d0, d1, d2, d3, d4, d5, d6, d7; + register DCTELEM *dataptr; + int rowctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register int *idataptr = (int*)dataptr; + + /* WARNING: we do the same permutation as MMX idct to simplify the + video core */ + d0 = dataptr[0]; + d2 = dataptr[1]; + d4 = dataptr[2]; + d6 = dataptr[3]; + d1 = dataptr[4]; + d3 = dataptr[5]; + d5 = dataptr[6]; + d7 = dataptr[7]; + + if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); + + idataptr[0] = v; + idataptr[1] = v; + idataptr[2] = v; + idataptr[3] = v; + } + + dataptr += DCTSIZE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ +{ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + if (d7) { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ + z2 = d5 + d3; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d5, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d5, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 = z1 + z4; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z4 = d5 + d1; + z5 = MULTIPLY(d7 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z3 = MULTIPLY(-d7, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 = z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z5 = MULTIPLY(d5 + d7, FIX_1_175875602); + + z3 += z5; + z4 += z5; + + tmp0 += z3; + tmp1 += z4; + tmp2 = z2 + z3; + tmp3 = z1 + z4; + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d1, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d1, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 = z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ + z3 = d7 + d3; + + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + tmp2 = MULTIPLY(d3, FIX_0_509795579); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z5 = MULTIPLY(z3, FIX_1_175875602); + z3 = MULTIPLY(-z3, FIX_0_785694958); + + tmp0 += z3; + tmp1 = z2 + z5; + tmp2 += z3; + tmp3 = z1 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z5 = MULTIPLY(z1, FIX_1_175875602); + + z1 = MULTIPLY(z1, FIX_0_275899380); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp0 = MULTIPLY(-d7, FIX_1_662939225); + z4 = MULTIPLY(-d1, FIX_0_390180644); + tmp3 = MULTIPLY(d1, FIX_1_111140466); + + tmp0 += z1; + tmp1 = z4 + z5; + tmp2 = z3 + z5; + tmp3 += z1; + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_1_387039845); + tmp1 = MULTIPLY(d7, FIX_1_175875602); + tmp2 = MULTIPLY(-d7, FIX_0_785694958); + tmp3 = MULTIPLY(d7, FIX_0_275899380); + } + } + } + } else { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(d3 + z4, FIX_1_175875602); + + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-d1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-d3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 = z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + + z5 = MULTIPLY(z2, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_1_662939225); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z2 = MULTIPLY(-z2, FIX_1_387039845); + tmp2 = MULTIPLY(d3, FIX_1_111140466); + z3 = MULTIPLY(-d3, FIX_1_961570560); + + tmp0 = z3 + z5; + tmp1 += z2; + tmp2 += z2; + tmp3 = z4 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ + z4 = d5 + d1; + + z5 = MULTIPLY(z4, FIX_1_175875602); + z1 = MULTIPLY(-d1, FIX_0_899976223); + tmp3 = MULTIPLY(d1, FIX_0_601344887); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(z4, FIX_0_785694958); + + tmp0 = z1 + z5; + tmp1 += z4; + tmp2 = z2 + z5; + tmp3 += z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ + tmp0 = MULTIPLY(d5, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_0_275899380); + tmp2 = MULTIPLY(-d5, FIX_1_387039845); + tmp3 = MULTIPLY(d5, FIX_0_785694958); + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ + z5 = d1 + d3; + tmp3 = MULTIPLY(d1, FIX_0_211164243); + tmp2 = MULTIPLY(-d3, FIX_1_451774981); + z1 = MULTIPLY(d1, FIX_1_061594337); + z2 = MULTIPLY(-d3, FIX_2_172734803); + z4 = MULTIPLY(z5, FIX_0_785694958); + z5 = MULTIPLY(z5, FIX_1_175875602); + + tmp0 = z1 - z4; + tmp1 = z2 + z4; + tmp2 += z5; + tmp3 += z5; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(-d3, FIX_0_785694958); + tmp1 = MULTIPLY(-d3, FIX_1_387039845); + tmp2 = MULTIPLY(-d3, FIX_0_275899380); + tmp3 = MULTIPLY(d3, FIX_1_175875602); + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d1, FIX_0_275899380); + tmp1 = MULTIPLY(d1, FIX_0_785694958); + tmp2 = MULTIPLY(d1, FIX_1_175875602); + tmp3 = MULTIPLY(d1, FIX_1_387039845); + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = tmp1 = tmp2 = tmp3 = 0; + } + } + } + } +} + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSIZE*0]; + d1 = dataptr[DCTSIZE*1]; + d2 = dataptr[DCTSIZE*2]; + d3 = dataptr[DCTSIZE*3]; + d4 = dataptr[DCTSIZE*4]; + d5 = dataptr[DCTSIZE*5]; + d6 = dataptr[DCTSIZE*6]; + d7 = dataptr[DCTSIZE*7]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + if (d7) { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7; + z2 = d5 + d3; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d5, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d5, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 = z1 + z4; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5; + z3 = d7; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z3 = MULTIPLY(-d7, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 = z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z5 = MULTIPLY(d5 + d7, FIX_1_175875602); + + z3 += z5; + z4 += z5; + + tmp0 += z3; + tmp1 += z4; + tmp2 = z2 + z3; + tmp3 = z1 + z4; + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d1, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d1, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 = z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ + z3 = d7 + d3; + + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + tmp2 = MULTIPLY(d3, FIX_0_509795579); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z5 = MULTIPLY(z3, FIX_1_175875602); + z3 = MULTIPLY(-z3, FIX_0_785694958); + + tmp0 += z3; + tmp1 = z2 + z5; + tmp2 += z3; + tmp3 = z1 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z5 = MULTIPLY(z1, FIX_1_175875602); + + z1 = MULTIPLY(z1, FIX_0_275899380); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp0 = MULTIPLY(-d7, FIX_1_662939225); + z4 = MULTIPLY(-d1, FIX_0_390180644); + tmp3 = MULTIPLY(d1, FIX_1_111140466); + + tmp0 += z1; + tmp1 = z4 + z5; + tmp2 = z3 + z5; + tmp3 += z1; + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_1_387039845); + tmp1 = MULTIPLY(d7, FIX_1_175875602); + tmp2 = MULTIPLY(-d7, FIX_0_785694958); + tmp3 = MULTIPLY(d7, FIX_0_275899380); + } + } + } + } else { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(d3 + z4, FIX_1_175875602); + + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-d1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-d3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 = z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + + z5 = MULTIPLY(z2, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_1_662939225); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z2 = MULTIPLY(-z2, FIX_1_387039845); + tmp2 = MULTIPLY(d3, FIX_1_111140466); + z3 = MULTIPLY(-d3, FIX_1_961570560); + + tmp0 = z3 + z5; + tmp1 += z2; + tmp2 += z2; + tmp3 = z4 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ + z4 = d5 + d1; + + z5 = MULTIPLY(z4, FIX_1_175875602); + z1 = MULTIPLY(-d1, FIX_0_899976223); + tmp3 = MULTIPLY(d1, FIX_0_601344887); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(z4, FIX_0_785694958); + + tmp0 = z1 + z5; + tmp1 += z4; + tmp2 = z2 + z5; + tmp3 += z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ + tmp0 = MULTIPLY(d5, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_0_275899380); + tmp2 = MULTIPLY(-d5, FIX_1_387039845); + tmp3 = MULTIPLY(d5, FIX_0_785694958); + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ + z5 = d1 + d3; + tmp3 = MULTIPLY(d1, FIX_0_211164243); + tmp2 = MULTIPLY(-d3, FIX_1_451774981); + z1 = MULTIPLY(d1, FIX_1_061594337); + z2 = MULTIPLY(-d3, FIX_2_172734803); + z4 = MULTIPLY(z5, FIX_0_785694958); + z5 = MULTIPLY(z5, FIX_1_175875602); + + tmp0 = z1 - z4; + tmp1 = z2 + z4; + tmp2 += z5; + tmp3 += z5; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(-d3, FIX_0_785694958); + tmp1 = MULTIPLY(-d3, FIX_1_387039845); + tmp2 = MULTIPLY(-d3, FIX_0_275899380); + tmp3 = MULTIPLY(d3, FIX_1_175875602); + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d1, FIX_0_275899380); + tmp1 = MULTIPLY(d1, FIX_0_785694958); + tmp2 = MULTIPLY(d1, FIX_1_175875602); + tmp3 = MULTIPLY(d1, FIX_1_387039845); + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = tmp1 = tmp2 = tmp3 = 0; + } + } + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + +#undef DCTSIZE +#define DCTSIZE 4 +#define DCTSTRIDE 8 + +void j_rev_dct4(DCTBLOCK data) +{ + int32_t tmp0, tmp1, tmp2, tmp3; + int32_t tmp10, tmp11, tmp12, tmp13; + int32_t z1; + int32_t d0, d2, d4, d6; + register DCTELEM *dataptr; + int rowctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + data[0] += 4; + + dataptr = data; + + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register int *idataptr = (int*)dataptr; + + d0 = dataptr[0]; + d2 = dataptr[1]; + d4 = dataptr[2]; + d6 = dataptr[3]; + + if ((d2 | d4 | d6) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); + + idataptr[0] = v; + idataptr[1] = v; + } + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSTRIDE*0]; + d2 = dataptr[DCTSTRIDE*1]; + d4 = dataptr[DCTSTRIDE*2]; + d6 = dataptr[DCTSTRIDE*3]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + +void j_rev_dct2(DCTBLOCK data){ + int d00, d01, d10, d11; + + data[0] += 4; + d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; + d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; + d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; + d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; + + data[0+0*DCTSTRIDE]= (d00 + d10)>>3; + data[1+0*DCTSTRIDE]= (d01 + d11)>>3; + data[0+1*DCTSTRIDE]= (d00 - d10)>>3; + data[1+1*DCTSTRIDE]= (d01 - d11)>>3; +} + +void j_rev_dct1(DCTBLOCK data){ + data[0] = (data[0] + 4)>>3; +} + +#undef FIX +#undef CONST_BITS diff --git a/mpeg4/src/libavcodec/kmvc.c b/mpeg4/src/libavcodec/kmvc.c new file mode 100644 index 0000000000000000000000000000000000000000..333c909aa8244106113545f96ca32c36e8f0c03f --- /dev/null +++ b/mpeg4/src/libavcodec/kmvc.c @@ -0,0 +1,394 @@ +/* + * KMVC decoder + * Copyright (c) 2006 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file kmvc.c + * Karl Morton's Video Codec decoder + */ + +#include +#include + +#include "common.h" +#include "avcodec.h" + +#define KMVC_KEYFRAME 0x80 +#define KMVC_PALETTE 0x40 +#define KMVC_METHOD 0x0F + +/* + * Decoder context + */ +typedef struct KmvcContext { + AVCodecContext *avctx; + AVFrame pic; + + int setpal; + int palsize; + uint32_t pal[256]; + uint8_t *cur, *prev; + uint8_t *frm0, *frm1; +} KmvcContext; + +typedef struct BitBuf { + int bits; + int bitbuf; +} BitBuf; + +#define BLK(data, x, y) data[(x) + (y) * 320] + +#define kmvc_init_getbits(bb, src) bb.bits = 7; bb.bitbuf = *src++; + +#define kmvc_getbit(bb, src, res) {\ + res = 0; \ + if (bb.bitbuf & (1 << bb.bits)) res = 1; \ + bb.bits--; \ + if(bb.bits == -1) { \ + bb.bitbuf = *src++; \ + bb.bits = 7; \ + } \ +} + +static void kmvc_decode_intra_8x8(KmvcContext * ctx, uint8_t * src, int w, int h) +{ + BitBuf bb; + int res, val; + int i, j; + int bx, by; + int l0x, l1x, l0y, l1y; + int mx, my; + + kmvc_init_getbits(bb, src); + + for (by = 0; by < h; by += 8) + for (bx = 0; bx < w; bx += 8) { + kmvc_getbit(bb, src, res); + if (!res) { // fill whole 8x8 block + val = *src++; + for (i = 0; i < 64; i++) + BLK(ctx->cur, bx + (i & 0x7), by + (i >> 3)) = val; + } else { // handle four 4x4 subblocks + for (i = 0; i < 4; i++) { + l0x = bx + (i & 1) * 4; + l0y = by + (i & 2) * 2; + kmvc_getbit(bb, src, res); + if (!res) { + kmvc_getbit(bb, src, res); + if (!res) { // fill whole 4x4 block + val = *src++; + for (j = 0; j < 16; j++) + BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) = val; + } else { // copy block from already decoded place + val = *src++; + mx = val & 0xF; + my = val >> 4; + for (j = 0; j < 16; j++) + BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) = + BLK(ctx->cur, l0x + (j & 3) - mx, l0y + (j >> 2) - my); + } + } else { // descend to 2x2 sub-sub-blocks + for (j = 0; j < 4; j++) { + l1x = l0x + (j & 1) * 2; + l1y = l0y + (j & 2); + kmvc_getbit(bb, src, res); + if (!res) { + kmvc_getbit(bb, src, res); + if (!res) { // fill whole 2x2 block + val = *src++; + BLK(ctx->cur, l1x, l1y) = val; + BLK(ctx->cur, l1x + 1, l1y) = val; + BLK(ctx->cur, l1x, l1y + 1) = val; + BLK(ctx->cur, l1x + 1, l1y + 1) = val; + } else { // copy block from already decoded place + val = *src++; + mx = val & 0xF; + my = val >> 4; + BLK(ctx->cur, l1x, l1y) = BLK(ctx->cur, l1x - mx, l1y - my); + BLK(ctx->cur, l1x + 1, l1y) = + BLK(ctx->cur, l1x + 1 - mx, l1y - my); + BLK(ctx->cur, l1x, l1y + 1) = + BLK(ctx->cur, l1x - mx, l1y + 1 - my); + BLK(ctx->cur, l1x + 1, l1y + 1) = + BLK(ctx->cur, l1x + 1 - mx, l1y + 1 - my); + } + } else { // read values for block + BLK(ctx->cur, l1x, l1y) = *src++; + BLK(ctx->cur, l1x + 1, l1y) = *src++; + BLK(ctx->cur, l1x, l1y + 1) = *src++; + BLK(ctx->cur, l1x + 1, l1y + 1) = *src++; + } + } + } + } + } + } +} + +static void kmvc_decode_inter_8x8(KmvcContext * ctx, uint8_t * src, int w, int h) +{ + BitBuf bb; + int res, val; + int i, j; + int bx, by; + int l0x, l1x, l0y, l1y; + int mx, my; + + kmvc_init_getbits(bb, src); + + for (by = 0; by < h; by += 8) + for (bx = 0; bx < w; bx += 8) { + kmvc_getbit(bb, src, res); + if (!res) { + kmvc_getbit(bb, src, res); + if (!res) { // fill whole 8x8 block + val = *src++; + for (i = 0; i < 64; i++) + BLK(ctx->cur, bx + (i & 0x7), by + (i >> 3)) = val; + } else { // copy block from previous frame + for (i = 0; i < 64; i++) + BLK(ctx->cur, bx + (i & 0x7), by + (i >> 3)) = + BLK(ctx->prev, bx + (i & 0x7), by + (i >> 3)); + } + } else { // handle four 4x4 subblocks + for (i = 0; i < 4; i++) { + l0x = bx + (i & 1) * 4; + l0y = by + (i & 2) * 2; + kmvc_getbit(bb, src, res); + if (!res) { + kmvc_getbit(bb, src, res); + if (!res) { // fill whole 4x4 block + val = *src++; + for (j = 0; j < 16; j++) + BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) = val; + } else { // copy block + val = *src++; + mx = (val & 0xF) - 8; + my = (val >> 4) - 8; + for (j = 0; j < 16; j++) + BLK(ctx->cur, l0x + (j & 3), l0y + (j >> 2)) = + BLK(ctx->prev, l0x + (j & 3) + mx, l0y + (j >> 2) + my); + } + } else { // descend to 2x2 sub-sub-blocks + for (j = 0; j < 4; j++) { + l1x = l0x + (j & 1) * 2; + l1y = l0y + (j & 2); + kmvc_getbit(bb, src, res); + if (!res) { + kmvc_getbit(bb, src, res); + if (!res) { // fill whole 2x2 block + val = *src++; + BLK(ctx->cur, l1x, l1y) = val; + BLK(ctx->cur, l1x + 1, l1y) = val; + BLK(ctx->cur, l1x, l1y + 1) = val; + BLK(ctx->cur, l1x + 1, l1y + 1) = val; + } else { // copy block + val = *src++; + mx = (val & 0xF) - 8; + my = (val >> 4) - 8; + BLK(ctx->cur, l1x, l1y) = BLK(ctx->prev, l1x + mx, l1y + my); + BLK(ctx->cur, l1x + 1, l1y) = + BLK(ctx->prev, l1x + 1 + mx, l1y + my); + BLK(ctx->cur, l1x, l1y + 1) = + BLK(ctx->prev, l1x + mx, l1y + 1 + my); + BLK(ctx->cur, l1x + 1, l1y + 1) = + BLK(ctx->prev, l1x + 1 + mx, l1y + 1 + my); + } + } else { // read values for block + BLK(ctx->cur, l1x, l1y) = *src++; + BLK(ctx->cur, l1x + 1, l1y) = *src++; + BLK(ctx->cur, l1x, l1y + 1) = *src++; + BLK(ctx->cur, l1x + 1, l1y + 1) = *src++; + } + } + } + } + } + } +} + +static int decode_frame(AVCodecContext * avctx, void *data, int *data_size, uint8_t * buf, + int buf_size) +{ + KmvcContext *const ctx = (KmvcContext *) avctx->priv_data; + uint8_t *out, *src; + int i; + int header; + int blocksize; + + if (ctx->pic.data[0]) + avctx->release_buffer(avctx, &ctx->pic); + + ctx->pic.reference = 1; + ctx->pic.buffer_hints = FF_BUFFER_HINTS_VALID; + if (avctx->get_buffer(avctx, &ctx->pic) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + header = buf[0]; + buf++; + + if (header & KMVC_KEYFRAME) { + ctx->pic.key_frame = 1; + ctx->pic.pict_type = FF_I_TYPE; + } else { + ctx->pic.key_frame = 0; + ctx->pic.pict_type = FF_P_TYPE; + } + + if (header & KMVC_PALETTE) { + ctx->pic.palette_has_changed = 1; + // palette starts from index 1 and has 127 entries + for (i = 1; i <= ctx->palsize; i++) { + ctx->pal[i] = (buf[0] << 16) | (buf[1] << 8) | buf[2]; + buf += 3; + } + } + + if (ctx->setpal) { + ctx->setpal = 0; + ctx->pic.palette_has_changed = 1; + } + + /* make the palette available on the way out */ + memcpy(ctx->pic.data[1], ctx->pal, 1024); + + blocksize = *buf++; + + if (blocksize != 8) { + av_log(avctx, AV_LOG_ERROR, "Block size = %i\n", blocksize); + return -1; + } + memset(ctx->cur, 0, 320 * 200); + switch (header & KMVC_METHOD) { + case 3: + kmvc_decode_intra_8x8(ctx, buf, avctx->width, avctx->height); + break; + case 4: + kmvc_decode_inter_8x8(ctx, buf, avctx->width, avctx->height); + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unknown compression method %i\n", header & KMVC_METHOD); + return -1; + } + + out = ctx->pic.data[0]; + src = ctx->cur; + for (i = 0; i < avctx->height; i++) { + memcpy(out, src, avctx->width); + src += 320; + out += ctx->pic.linesize[0]; + } + + /* flip buffers */ + if (ctx->cur == ctx->frm0) { + ctx->cur = ctx->frm1; + ctx->prev = ctx->frm0; + } else { + ctx->cur = ctx->frm0; + ctx->prev = ctx->frm1; + } + + *data_size = sizeof(AVFrame); + *(AVFrame *) data = ctx->pic; + + /* always report that the buffer was completely consumed */ + return buf_size; +} + + + +/* + * Init kmvc decoder + */ +static int decode_init(AVCodecContext * avctx) +{ + KmvcContext *const c = (KmvcContext *) avctx->priv_data; + int i; + + c->avctx = avctx; + avctx->has_b_frames = 0; + + c->pic.data[0] = NULL; + + if (avctx->width > 320 || avctx->height > 200) { + av_log(avctx, AV_LOG_ERROR, "KMVC supports frames <= 320x200\n"); + return -1; + } + + c->frm0 = av_mallocz(320 * 200); + c->frm1 = av_mallocz(320 * 200); + c->cur = c->frm0; + c->prev = c->frm1; + + for (i = 0; i < 256; i++) { + c->pal[i] = i * 0x10101; + } + + if (avctx->extradata_size < 12) { + av_log(NULL, 0, "Extradata missing, decoding may not work properly...\n"); + c->palsize = 127; + } else { + c->palsize = LE_16(avctx->extradata + 10); + } + + if (avctx->extradata_size == 1036) { // palette in extradata + uint8_t *src = avctx->extradata + 12; + for (i = 0; i < c->palsize; i++) { + c->pal[i] = LE_32(src); + src += 4; + } + c->setpal = 1; + } + + avctx->pix_fmt = PIX_FMT_PAL8; + + return 0; +} + + + +/* + * Uninit kmvc decoder + */ +static int decode_end(AVCodecContext * avctx) +{ + KmvcContext *const c = (KmvcContext *) avctx->priv_data; + + if (c->frm0) + av_free(c->frm0); + if (c->frm1) + av_free(c->frm1); + if (c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + + return 0; +} + +AVCodec kmvc_decoder = { + "kmvc", + CODEC_TYPE_VIDEO, + CODEC_ID_KMVC, + sizeof(KmvcContext), + decode_init, + NULL, + decode_end, + decode_frame +}; diff --git a/mpeg4/src/libavcodec/lcl.c b/mpeg4/src/libavcodec/lcl.c new file mode 100644 index 0000000000000000000000000000000000000000..0bc118af20ca18b6f529ce050ddcdb9d63b0b217 --- /dev/null +++ b/mpeg4/src/libavcodec/lcl.c @@ -0,0 +1,922 @@ +/* + * LCL (LossLess Codec Library) Codec + * Copyright (c) 2002-2004 Roberto Togni + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/** + * @file lcl.c + * LCL (LossLess Codec Library) Video Codec + * Decoder for MSZH and ZLIB codecs + * Experimental encoder for ZLIB RGB24 + * + * Fourcc: MSZH, ZLIB + * + * Original Win32 dll: + * Ver2.23 By Kenji Oshima 2000.09.20 + * avimszh.dll, avizlib.dll + * + * A description of the decoding algorithm can be found here: + * http://www.pcisys.net/~melanson/codecs + * + * Supports: BGR24 (RGB 24bpp) + * + */ + +#include +#include + +#include "common.h" +#include "bitstream.h" +#include "avcodec.h" + +#ifdef CONFIG_ZLIB +#include +#endif + + +#define BMPTYPE_YUV 1 +#define BMPTYPE_RGB 2 + +#define IMGTYPE_YUV111 0 +#define IMGTYPE_YUV422 1 +#define IMGTYPE_RGB24 2 +#define IMGTYPE_YUV411 3 +#define IMGTYPE_YUV211 4 +#define IMGTYPE_YUV420 5 + +#define COMP_MSZH 0 +#define COMP_MSZH_NOCOMP 1 +#define COMP_ZLIB_HISPEED 1 +#define COMP_ZLIB_HICOMP 9 +#define COMP_ZLIB_NORMAL -1 + +#define FLAG_MULTITHREAD 1 +#define FLAG_NULLFRAME 2 +#define FLAG_PNGFILTER 4 +#define FLAGMASK_UNUSED 0xf8 + +#define CODEC_MSZH 1 +#define CODEC_ZLIB 3 + +#define FOURCC_MSZH mmioFOURCC('M','S','Z','H') +#define FOURCC_ZLIB mmioFOURCC('Z','L','I','B') + +/* + * Decoder context + */ +typedef struct LclContext { + + AVCodecContext *avctx; + AVFrame pic; + PutBitContext pb; + + // Image type + int imgtype; + // Compression type + int compression; + // Flags + int flags; + // Decompressed data size + unsigned int decomp_size; + // Decompression buffer + unsigned char* decomp_buf; + // Maximum compressed data size + unsigned int max_comp_size; + // Compression buffer + unsigned char* comp_buf; +#ifdef CONFIG_ZLIB + z_stream zstream; +#endif +} LclContext; + + +/* + * + * Helper functions + * + */ +static inline unsigned char fix (int pix14) +{ + int tmp; + + tmp = (pix14 + 0x80000) >> 20; + if (tmp < 0) + return 0; + if (tmp > 255) + return 255; + return tmp; +} + + + +static inline unsigned char get_b (unsigned char yq, signed char bq) +{ + return fix((yq << 20) + bq * 1858076); +} + + + +static inline unsigned char get_g (unsigned char yq, signed char bq, signed char rq) +{ + return fix((yq << 20) - bq * 360857 - rq * 748830); +} + + + +static inline unsigned char get_r (unsigned char yq, signed char rq) +{ + return fix((yq << 20) + rq * 1470103); +} + + + +static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned char * destptr, unsigned int destsize) +{ + unsigned char *destptr_bak = destptr; + unsigned char *destptr_end = destptr + destsize; + unsigned char mask = 0; + unsigned char maskbit = 0; + unsigned int ofs, cnt; + + while ((srclen > 0) && (destptr < destptr_end)) { + if (maskbit == 0) { + mask = *(srcptr++); + maskbit = 8; + srclen--; + continue; + } + if ((mask & (1 << (--maskbit))) == 0) { + if (destptr + 4 > destptr_end) + break; + *(int*)destptr = *(int*)srcptr; + srclen -= 4; + destptr += 4; + srcptr += 4; + } else { + ofs = *(srcptr++); + cnt = *(srcptr++); + ofs += cnt * 256;; + cnt = ((cnt >> 3) & 0x1f) + 1; + ofs &= 0x7ff; + srclen -= 2; + cnt *= 4; + if (destptr + cnt > destptr_end) { + cnt = destptr_end - destptr; + } + for (; cnt > 0; cnt--) { + *(destptr) = *(destptr - ofs); + destptr++; + } + } + } + + return (destptr - destptr_bak); +} + + + + +/* + * + * Decode a frame + * + */ +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) +{ + LclContext * const c = (LclContext *)avctx->priv_data; + unsigned char *encoded = (unsigned char *)buf; + unsigned int pixel_ptr; + int row, col; + unsigned char *outptr; + unsigned int width = avctx->width; // Real image width + unsigned int height = avctx->height; // Real image height + unsigned int mszh_dlen; + unsigned char yq, y1q, uq, vq; + int uqvq; + unsigned int mthread_inlen, mthread_outlen; +#ifdef CONFIG_ZLIB + int zret; // Zlib return code +#endif + unsigned int len = buf_size; + + if(c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + + c->pic.reference = 0; + c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; + if(avctx->get_buffer(avctx, &c->pic) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + outptr = c->pic.data[0]; // Output image pointer + + /* Decompress frame */ + switch (avctx->codec_id) { + case CODEC_ID_MSZH: + switch (c->compression) { + case COMP_MSZH: + if (c->flags & FLAG_MULTITHREAD) { + mthread_inlen = *((unsigned int*)encoded); + mthread_outlen = *((unsigned int*)(encoded+4)); + if (mthread_outlen > c->decomp_size) // this should not happen + mthread_outlen = c->decomp_size; + mszh_dlen = mszh_decomp(encoded + 8, mthread_inlen, c->decomp_buf, c->decomp_size); + if (mthread_outlen != mszh_dlen) { + av_log(avctx, AV_LOG_ERROR, "Mthread1 decoded size differs (%d != %d)\n", + mthread_outlen, mszh_dlen); + return -1; + } + mszh_dlen = mszh_decomp(encoded + 8 + mthread_inlen, len - mthread_inlen, + c->decomp_buf + mthread_outlen, c->decomp_size - mthread_outlen); + if (mthread_outlen != mszh_dlen) { + av_log(avctx, AV_LOG_ERROR, "Mthread2 decoded size differs (%d != %d)\n", + mthread_outlen, mszh_dlen); + return -1; + } + encoded = c->decomp_buf; + len = c->decomp_size; + } else { + mszh_dlen = mszh_decomp(encoded, len, c->decomp_buf, c->decomp_size); + if (c->decomp_size != mszh_dlen) { + av_log(avctx, AV_LOG_ERROR, "Decoded size differs (%d != %d)\n", + c->decomp_size, mszh_dlen); + return -1; + } + encoded = c->decomp_buf; + len = mszh_dlen; + } + break; + case COMP_MSZH_NOCOMP: + break; + default: + av_log(avctx, AV_LOG_ERROR, "BUG! Unknown MSZH compression in frame decoder.\n"); + return -1; + } + break; + case CODEC_ID_ZLIB: +#ifdef CONFIG_ZLIB + /* Using the original dll with normal compression (-1) and RGB format + * gives a file with ZLIB fourcc, but frame is really uncompressed. + * To be sure that's true check also frame size */ + if ((c->compression == COMP_ZLIB_NORMAL) && (c->imgtype == IMGTYPE_RGB24) && + (len == width * height * 3)) + break; + zret = inflateReset(&(c->zstream)); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret); + return -1; + } + if (c->flags & FLAG_MULTITHREAD) { + mthread_inlen = *((unsigned int*)encoded); + mthread_outlen = *((unsigned int*)(encoded+4)); + if (mthread_outlen > c->decomp_size) + mthread_outlen = c->decomp_size; + c->zstream.next_in = encoded + 8; + c->zstream.avail_in = mthread_inlen; + c->zstream.next_out = c->decomp_buf; + c->zstream.avail_out = c->decomp_size; + zret = inflate(&(c->zstream), Z_FINISH); + if ((zret != Z_OK) && (zret != Z_STREAM_END)) { + av_log(avctx, AV_LOG_ERROR, "Mthread1 inflate error: %d\n", zret); + return -1; + } + if (mthread_outlen != (unsigned int)(c->zstream.total_out)) { + av_log(avctx, AV_LOG_ERROR, "Mthread1 decoded size differs (%u != %lu)\n", + mthread_outlen, c->zstream.total_out); + return -1; + } + zret = inflateReset(&(c->zstream)); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Mthread2 inflate reset error: %d\n", zret); + return -1; + } + c->zstream.next_in = encoded + 8 + mthread_inlen; + c->zstream.avail_in = len - mthread_inlen; + c->zstream.next_out = c->decomp_buf + mthread_outlen; + c->zstream.avail_out = c->decomp_size - mthread_outlen; + zret = inflate(&(c->zstream), Z_FINISH); + if ((zret != Z_OK) && (zret != Z_STREAM_END)) { + av_log(avctx, AV_LOG_ERROR, "Mthread2 inflate error: %d\n", zret); + return -1; + } + if (mthread_outlen != (unsigned int)(c->zstream.total_out)) { + av_log(avctx, AV_LOG_ERROR, "Mthread2 decoded size differs (%d != %lu)\n", + mthread_outlen, c->zstream.total_out); + return -1; + } + } else { + c->zstream.next_in = encoded; + c->zstream.avail_in = len; + c->zstream.next_out = c->decomp_buf; + c->zstream.avail_out = c->decomp_size; + zret = inflate(&(c->zstream), Z_FINISH); + if ((zret != Z_OK) && (zret != Z_STREAM_END)) { + av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", zret); + return -1; + } + if (c->decomp_size != (unsigned int)(c->zstream.total_out)) { + av_log(avctx, AV_LOG_ERROR, "Decoded size differs (%d != %lu)\n", + c->decomp_size, c->zstream.total_out); + return -1; + } + } + encoded = c->decomp_buf; + len = c->decomp_size;; +#else + av_log(avctx, AV_LOG_ERROR, "BUG! Zlib support not compiled in frame decoder.\n"); + return -1; +#endif + break; + default: + av_log(avctx, AV_LOG_ERROR, "BUG! Unknown codec in frame decoder compression switch.\n"); + return -1; + } + + + /* Apply PNG filter */ + if ((avctx->codec_id == CODEC_ID_ZLIB) && (c->flags & FLAG_PNGFILTER)) { + switch (c->imgtype) { + case IMGTYPE_YUV111: + case IMGTYPE_RGB24: + for (row = 0; row < height; row++) { + pixel_ptr = row * width * 3; + yq = encoded[pixel_ptr++]; + uqvq = encoded[pixel_ptr++]; + uqvq+=(encoded[pixel_ptr++] << 8); + for (col = 1; col < width; col++) { + encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; + uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8)); + encoded[pixel_ptr+1] = (uqvq) & 0xff; + encoded[pixel_ptr+2] = ((uqvq)>>8) & 0xff; + pixel_ptr += 3; + } + } + break; + case IMGTYPE_YUV422: + for (row = 0; row < height; row++) { + pixel_ptr = row * width * 2; + yq = uq = vq =0; + for (col = 0; col < width/4; col++) { + encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; + encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1]; + encoded[pixel_ptr+2] = yq -= encoded[pixel_ptr+2]; + encoded[pixel_ptr+3] = yq -= encoded[pixel_ptr+3]; + encoded[pixel_ptr+4] = uq -= encoded[pixel_ptr+4]; + encoded[pixel_ptr+5] = uq -= encoded[pixel_ptr+5]; + encoded[pixel_ptr+6] = vq -= encoded[pixel_ptr+6]; + encoded[pixel_ptr+7] = vq -= encoded[pixel_ptr+7]; + pixel_ptr += 8; + } + } + break; + case IMGTYPE_YUV411: + for (row = 0; row < height; row++) { + pixel_ptr = row * width / 2 * 3; + yq = uq = vq =0; + for (col = 0; col < width/4; col++) { + encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; + encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1]; + encoded[pixel_ptr+2] = yq -= encoded[pixel_ptr+2]; + encoded[pixel_ptr+3] = yq -= encoded[pixel_ptr+3]; + encoded[pixel_ptr+4] = uq -= encoded[pixel_ptr+4]; + encoded[pixel_ptr+5] = vq -= encoded[pixel_ptr+5]; + pixel_ptr += 6; + } + } + break; + case IMGTYPE_YUV211: + for (row = 0; row < height; row++) { + pixel_ptr = row * width * 2; + yq = uq = vq =0; + for (col = 0; col < width/2; col++) { + encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; + encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1]; + encoded[pixel_ptr+2] = uq -= encoded[pixel_ptr+2]; + encoded[pixel_ptr+3] = vq -= encoded[pixel_ptr+3]; + pixel_ptr += 4; + } + } + break; + case IMGTYPE_YUV420: + for (row = 0; row < height/2; row++) { + pixel_ptr = row * width * 3; + yq = y1q = uq = vq =0; + for (col = 0; col < width/2; col++) { + encoded[pixel_ptr] = yq -= encoded[pixel_ptr]; + encoded[pixel_ptr+1] = yq -= encoded[pixel_ptr+1]; + encoded[pixel_ptr+2] = y1q -= encoded[pixel_ptr+2]; + encoded[pixel_ptr+3] = y1q -= encoded[pixel_ptr+3]; + encoded[pixel_ptr+4] = uq -= encoded[pixel_ptr+4]; + encoded[pixel_ptr+5] = vq -= encoded[pixel_ptr+5]; + pixel_ptr += 6; + } + } + break; + default: + av_log(avctx, AV_LOG_ERROR, "BUG! Unknown imagetype in pngfilter switch.\n"); + return -1; + } + } + + /* Convert colorspace */ + switch (c->imgtype) { + case IMGTYPE_YUV111: + for (row = height - 1; row >= 0; row--) { + pixel_ptr = row * c->pic.linesize[0]; + for (col = 0; col < width; col++) { + outptr[pixel_ptr++] = get_b(encoded[0], encoded[1]); + outptr[pixel_ptr++] = get_g(encoded[0], encoded[1], encoded[2]); + outptr[pixel_ptr++] = get_r(encoded[0], encoded[2]); + encoded += 3; + } + } + break; + case IMGTYPE_YUV422: + for (row = height - 1; row >= 0; row--) { + pixel_ptr = row * c->pic.linesize[0]; + for (col = 0; col < width/4; col++) { + outptr[pixel_ptr++] = get_b(encoded[0], encoded[4]); + outptr[pixel_ptr++] = get_g(encoded[0], encoded[4], encoded[6]); + outptr[pixel_ptr++] = get_r(encoded[0], encoded[6]); + outptr[pixel_ptr++] = get_b(encoded[1], encoded[4]); + outptr[pixel_ptr++] = get_g(encoded[1], encoded[4], encoded[6]); + outptr[pixel_ptr++] = get_r(encoded[1], encoded[6]); + outptr[pixel_ptr++] = get_b(encoded[2], encoded[5]); + outptr[pixel_ptr++] = get_g(encoded[2], encoded[5], encoded[7]); + outptr[pixel_ptr++] = get_r(encoded[2], encoded[7]); + outptr[pixel_ptr++] = get_b(encoded[3], encoded[5]); + outptr[pixel_ptr++] = get_g(encoded[3], encoded[5], encoded[7]); + outptr[pixel_ptr++] = get_r(encoded[3], encoded[7]); + encoded += 8; + } + } + break; + case IMGTYPE_RGB24: + for (row = height - 1; row >= 0; row--) { + pixel_ptr = row * c->pic.linesize[0]; + for (col = 0; col < width; col++) { + outptr[pixel_ptr++] = encoded[0]; + outptr[pixel_ptr++] = encoded[1]; + outptr[pixel_ptr++] = encoded[2]; + encoded += 3; + } + } + break; + case IMGTYPE_YUV411: + for (row = height - 1; row >= 0; row--) { + pixel_ptr = row * c->pic.linesize[0]; + for (col = 0; col < width/4; col++) { + outptr[pixel_ptr++] = get_b(encoded[0], encoded[4]); + outptr[pixel_ptr++] = get_g(encoded[0], encoded[4], encoded[5]); + outptr[pixel_ptr++] = get_r(encoded[0], encoded[5]); + outptr[pixel_ptr++] = get_b(encoded[1], encoded[4]); + outptr[pixel_ptr++] = get_g(encoded[1], encoded[4], encoded[5]); + outptr[pixel_ptr++] = get_r(encoded[1], encoded[5]); + outptr[pixel_ptr++] = get_b(encoded[2], encoded[4]); + outptr[pixel_ptr++] = get_g(encoded[2], encoded[4], encoded[5]); + outptr[pixel_ptr++] = get_r(encoded[2], encoded[5]); + outptr[pixel_ptr++] = get_b(encoded[3], encoded[4]); + outptr[pixel_ptr++] = get_g(encoded[3], encoded[4], encoded[5]); + outptr[pixel_ptr++] = get_r(encoded[3], encoded[5]); + encoded += 6; + } + } + break; + case IMGTYPE_YUV211: + for (row = height - 1; row >= 0; row--) { + pixel_ptr = row * c->pic.linesize[0]; + for (col = 0; col < width/2; col++) { + outptr[pixel_ptr++] = get_b(encoded[0], encoded[2]); + outptr[pixel_ptr++] = get_g(encoded[0], encoded[2], encoded[3]); + outptr[pixel_ptr++] = get_r(encoded[0], encoded[3]); + outptr[pixel_ptr++] = get_b(encoded[1], encoded[2]); + outptr[pixel_ptr++] = get_g(encoded[1], encoded[2], encoded[3]); + outptr[pixel_ptr++] = get_r(encoded[1], encoded[3]); + encoded += 4; + } + } + break; + case IMGTYPE_YUV420: + for (row = height / 2 - 1; row >= 0; row--) { + pixel_ptr = 2 * row * c->pic.linesize[0]; + for (col = 0; col < width/2; col++) { + outptr[pixel_ptr] = get_b(encoded[0], encoded[4]); + outptr[pixel_ptr+1] = get_g(encoded[0], encoded[4], encoded[5]); + outptr[pixel_ptr+2] = get_r(encoded[0], encoded[5]); + outptr[pixel_ptr+3] = get_b(encoded[1], encoded[4]); + outptr[pixel_ptr+4] = get_g(encoded[1], encoded[4], encoded[5]); + outptr[pixel_ptr+5] = get_r(encoded[1], encoded[5]); + outptr[pixel_ptr-c->pic.linesize[0]] = get_b(encoded[2], encoded[4]); + outptr[pixel_ptr-c->pic.linesize[0]+1] = get_g(encoded[2], encoded[4], encoded[5]); + outptr[pixel_ptr-c->pic.linesize[0]+2] = get_r(encoded[2], encoded[5]); + outptr[pixel_ptr-c->pic.linesize[0]+3] = get_b(encoded[3], encoded[4]); + outptr[pixel_ptr-c->pic.linesize[0]+4] = get_g(encoded[3], encoded[4], encoded[5]); + outptr[pixel_ptr-c->pic.linesize[0]+5] = get_r(encoded[3], encoded[5]); + pixel_ptr += 6; + encoded += 6; + } + } + break; + default: + av_log(avctx, AV_LOG_ERROR, "BUG! Unknown imagetype in image decoder.\n"); + return -1; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = c->pic; + + /* always report that the buffer was completely consumed */ + return buf_size; +} + + + +/* + * + * Encode a frame + * + */ +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + LclContext *c = avctx->priv_data; + AVFrame *pict = data; + AVFrame * const p = &c->pic; + int i; + int zret; // Zlib return code + +#ifndef CONFIG_ZLIB + av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled in.\n"); + return -1; +#else + + init_put_bits(&c->pb, buf, buf_size); + + *p = *pict; + p->pict_type= FF_I_TYPE; + p->key_frame= 1; + + if(avctx->pix_fmt != PIX_FMT_BGR24){ + av_log(avctx, AV_LOG_ERROR, "Format not supported!\n"); + return -1; + } + + zret = deflateReset(&(c->zstream)); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Deflate reset error: %d\n", zret); + return -1; + } + c->zstream.next_out = c->comp_buf; + c->zstream.avail_out = c->max_comp_size; + + for(i = avctx->height - 1; i >= 0; i--) { + c->zstream.next_in = p->data[0]+p->linesize[0]*i; + c->zstream.avail_in = avctx->width*3; + zret = deflate(&(c->zstream), Z_NO_FLUSH); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); + return -1; + } + } + zret = deflate(&(c->zstream), Z_FINISH); + if (zret != Z_STREAM_END) { + av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); + return -1; + } + + for (i = 0; i < c->zstream.total_out; i++) + put_bits(&c->pb, 8, c->comp_buf[i]); + flush_put_bits(&c->pb); + + return c->zstream.total_out; +#endif +} + + + +/* + * + * Init lcl decoder + * + */ +static int decode_init(AVCodecContext *avctx) +{ + LclContext * const c = (LclContext *)avctx->priv_data; + unsigned int basesize = avctx->width * avctx->height; + unsigned int max_basesize = ((avctx->width + 3) & ~3) * ((avctx->height + 3) & ~3); + unsigned int max_decomp_size; + int zret; // Zlib return code + + c->avctx = avctx; + avctx->has_b_frames = 0; + + c->pic.data[0] = NULL; + +#ifdef CONFIG_ZLIB + // Needed if zlib unused or init aborted before inflateInit + memset(&(c->zstream), 0, sizeof(z_stream)); +#endif + + if (avctx->extradata_size < 8) { + av_log(avctx, AV_LOG_ERROR, "Extradata size too small.\n"); + return 1; + } + + if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { + return 1; + } + + /* Check codec type */ + if (((avctx->codec_id == CODEC_ID_MSZH) && (*((char *)avctx->extradata + 7) != CODEC_MSZH)) || + ((avctx->codec_id == CODEC_ID_ZLIB) && (*((char *)avctx->extradata + 7) != CODEC_ZLIB))) { + av_log(avctx, AV_LOG_ERROR, "Codec id and codec type mismatch. This should not happen.\n"); + } + + /* Detect image type */ + switch (c->imgtype = *((char *)avctx->extradata + 4)) { + case IMGTYPE_YUV111: + c->decomp_size = basesize * 3; + max_decomp_size = max_basesize * 3; + av_log(avctx, AV_LOG_INFO, "Image type is YUV 1:1:1.\n"); + break; + case IMGTYPE_YUV422: + c->decomp_size = basesize * 2; + max_decomp_size = max_basesize * 2; + av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:2:2.\n"); + break; + case IMGTYPE_RGB24: + c->decomp_size = basesize * 3; + max_decomp_size = max_basesize * 3; + av_log(avctx, AV_LOG_INFO, "Image type is RGB 24.\n"); + break; + case IMGTYPE_YUV411: + c->decomp_size = basesize / 2 * 3; + max_decomp_size = max_basesize / 2 * 3; + av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:1:1.\n"); + break; + case IMGTYPE_YUV211: + c->decomp_size = basesize * 2; + max_decomp_size = max_basesize * 2; + av_log(avctx, AV_LOG_INFO, "Image type is YUV 2:1:1.\n"); + break; + case IMGTYPE_YUV420: + c->decomp_size = basesize / 2 * 3; + max_decomp_size = max_basesize / 2 * 3; + av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:2:0.\n"); + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unsupported image format %d.\n", c->imgtype); + return 1; + } + + /* Detect compression method */ + c->compression = *((char *)avctx->extradata + 5); + switch (avctx->codec_id) { + case CODEC_ID_MSZH: + switch (c->compression) { + case COMP_MSZH: + av_log(avctx, AV_LOG_INFO, "Compression enabled.\n"); + break; + case COMP_MSZH_NOCOMP: + c->decomp_size = 0; + av_log(avctx, AV_LOG_INFO, "No compression.\n"); + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unsupported compression format for MSZH (%d).\n", c->compression); + return 1; + } + break; + case CODEC_ID_ZLIB: +#ifdef CONFIG_ZLIB + switch (c->compression) { + case COMP_ZLIB_HISPEED: + av_log(avctx, AV_LOG_INFO, "High speed compression.\n"); + break; + case COMP_ZLIB_HICOMP: + av_log(avctx, AV_LOG_INFO, "High compression.\n"); + break; + case COMP_ZLIB_NORMAL: + av_log(avctx, AV_LOG_INFO, "Normal compression.\n"); + break; + default: + if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) { + av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression); + return 1; + } + av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression); + } +#else + av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n"); + return 1; +#endif + break; + default: + av_log(avctx, AV_LOG_ERROR, "BUG! Unknown codec in compression switch.\n"); + return 1; + } + + /* Allocate decompression buffer */ + if (c->decomp_size) { + if ((c->decomp_buf = av_malloc(max_decomp_size)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n"); + return 1; + } + } + + /* Detect flags */ + c->flags = *((char *)avctx->extradata + 6); + if (c->flags & FLAG_MULTITHREAD) + av_log(avctx, AV_LOG_INFO, "Multithread encoder flag set.\n"); + if (c->flags & FLAG_NULLFRAME) + av_log(avctx, AV_LOG_INFO, "Nullframe insertion flag set.\n"); + if ((avctx->codec_id == CODEC_ID_ZLIB) && (c->flags & FLAG_PNGFILTER)) + av_log(avctx, AV_LOG_INFO, "PNG filter flag set.\n"); + if (c->flags & FLAGMASK_UNUSED) + av_log(avctx, AV_LOG_ERROR, "Unknown flag set (%d).\n", c->flags); + + /* If needed init zlib */ + if (avctx->codec_id == CODEC_ID_ZLIB) { +#ifdef CONFIG_ZLIB + c->zstream.zalloc = Z_NULL; + c->zstream.zfree = Z_NULL; + c->zstream.opaque = Z_NULL; + zret = inflateInit(&(c->zstream)); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret); + return 1; + } +#else + av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n"); + return 1; +#endif + } + + avctx->pix_fmt = PIX_FMT_BGR24; + + return 0; +} + + + +/* + * + * Init lcl encoder + * + */ +static int encode_init(AVCodecContext *avctx) +{ + LclContext *c = avctx->priv_data; + int zret; // Zlib return code + +#ifndef CONFIG_ZLIB + av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n"); + return 1; +#else + + c->avctx= avctx; + + assert(avctx->width && avctx->height); + + avctx->extradata= av_mallocz(8); + avctx->coded_frame= &c->pic; + + // Will be user settable someday + c->compression = 6; + c->flags = 0; + + switch(avctx->pix_fmt){ + case PIX_FMT_BGR24: + c->imgtype = IMGTYPE_RGB24; + c->decomp_size = avctx->width * avctx->height * 3; + avctx->bits_per_sample= 24; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Format %d not supported\n", avctx->pix_fmt); + return -1; + } + + ((uint8_t*)avctx->extradata)[0]= 4; + ((uint8_t*)avctx->extradata)[1]= 0; + ((uint8_t*)avctx->extradata)[2]= 0; + ((uint8_t*)avctx->extradata)[3]= 0; + ((uint8_t*)avctx->extradata)[4]= c->imgtype; + ((uint8_t*)avctx->extradata)[5]= c->compression; + ((uint8_t*)avctx->extradata)[6]= c->flags; + ((uint8_t*)avctx->extradata)[7]= CODEC_ZLIB; + c->avctx->extradata_size= 8; + + c->zstream.zalloc = Z_NULL; + c->zstream.zfree = Z_NULL; + c->zstream.opaque = Z_NULL; + zret = deflateInit(&(c->zstream), c->compression); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Deflate init error: %d\n", zret); + return 1; + } + + /* Conservative upper bound taken from zlib v1.2.1 source */ + c->max_comp_size = c->decomp_size + ((c->decomp_size + 7) >> 3) + + ((c->decomp_size + 63) >> 6) + 11; + if ((c->comp_buf = av_malloc(c->max_comp_size)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n"); + return 1; + } + + return 0; +#endif +} + + + + + +/* + * + * Uninit lcl decoder + * + */ +static int decode_end(AVCodecContext *avctx) +{ + LclContext * const c = (LclContext *)avctx->priv_data; + + if (c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); +#ifdef CONFIG_ZLIB + inflateEnd(&(c->zstream)); +#endif + + return 0; +} + + + +/* + * + * Uninit lcl encoder + * + */ +static int encode_end(AVCodecContext *avctx) +{ + LclContext *c = avctx->priv_data; + + av_freep(&avctx->extradata); + av_freep(&c->comp_buf); +#ifdef CONFIG_ZLIB + deflateEnd(&(c->zstream)); +#endif + + return 0; +} + +AVCodec mszh_decoder = { + "mszh", + CODEC_TYPE_VIDEO, + CODEC_ID_MSZH, + sizeof(LclContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; + + +AVCodec zlib_decoder = { + "zlib", + CODEC_TYPE_VIDEO, + CODEC_ID_ZLIB, + sizeof(LclContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; + +#ifdef CONFIG_ENCODERS + +AVCodec zlib_encoder = { + "zlib", + CODEC_TYPE_VIDEO, + CODEC_ID_ZLIB, + sizeof(LclContext), + encode_init, + encode_frame, + encode_end, +}; + +#endif //CONFIG_ENCODERS diff --git a/mpeg4/src/libavcodec/liba52/a52.h b/mpeg4/src/libavcodec/liba52/a52.h new file mode 100644 index 0000000000000000000000000000000000000000..f2ea5f836a76071d2ab6dbb0f538d9a16fbbb105 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/a52.h @@ -0,0 +1,73 @@ +/* + * a52.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef A52_H +#define A52_H + +#include "../avcodec.h" + +#undef malloc +#undef free +#undef realloc + +#if defined(LIBA52_FIXED) +typedef int32_t sample_t; +typedef int32_t level_t; +#elif defined(LIBA52_DOUBLE) +typedef double sample_t; +typedef double level_t; +#else +typedef float sample_t; +typedef float level_t; +#endif + +typedef struct a52_state_s a52_state_t; + +#define A52_CHANNEL 0 +#define A52_MONO 1 +#define A52_STEREO 2 +#define A52_3F 3 +#define A52_2F1R 4 +#define A52_3F1R 5 +#define A52_2F2R 6 +#define A52_3F2R 7 +#define A52_CHANNEL1 8 +#define A52_CHANNEL2 9 +#define A52_DOLBY 10 +#define A52_CHANNEL_MASK 15 + +#define A52_LFE 16 +#define A52_ADJUST_LEVEL 32 + +a52_state_t * a52_init (uint32_t mm_accel); +sample_t * a52_samples (a52_state_t * state); +int a52_syncinfo (uint8_t * buf, int * flags, + int * sample_rate, int * bit_rate); +int a52_frame (a52_state_t * state, uint8_t * buf, int * flags, + level_t * level, sample_t bias); +void a52_dynrng (a52_state_t * state, + level_t (* call) (level_t, void *), void * data); +int a52_block (a52_state_t * state); +void a52_free (a52_state_t * state); + +#endif /* A52_H */ diff --git a/mpeg4/src/libavcodec/liba52/a52_internal.h b/mpeg4/src/libavcodec/liba52/a52_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..49fd4ef9973873d238dac7b6bbddef15571f888a --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/a52_internal.h @@ -0,0 +1,162 @@ +/* + * a52_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +typedef struct { + uint8_t bai; /* fine SNR offset, fast gain */ + uint8_t deltbae; /* delta bit allocation exists */ + int8_t deltba[50]; /* per-band delta bit allocation */ +} ba_t; + +typedef struct { + uint8_t exp[256]; /* decoded channel exponents */ + int8_t bap[256]; /* derived channel bit allocation */ +} expbap_t; + +struct a52_state_s { + uint8_t fscod; /* sample rate */ + uint8_t halfrate; /* halfrate factor */ + uint8_t acmod; /* coded channels */ + uint8_t lfeon; /* coded lfe channel */ + level_t clev; /* centre channel mix level */ + level_t slev; /* surround channels mix level */ + + int output; /* type of output */ + level_t level; /* output level */ + sample_t bias; /* output bias */ + + int dynrnge; /* apply dynamic range */ + level_t dynrng; /* dynamic range */ + void * dynrngdata; /* dynamic range callback funtion and data */ + level_t (* dynrngcall) (level_t range, void * dynrngdata); + + uint8_t chincpl; /* channel coupled */ + uint8_t phsflginu; /* phase flags in use (stereo only) */ + uint8_t cplstrtmant; /* coupling channel start mantissa */ + uint8_t cplendmant; /* coupling channel end mantissa */ + uint32_t cplbndstrc; /* coupling band structure */ + level_t cplco[5][18]; /* coupling coordinates */ + + /* derived information */ + uint8_t cplstrtbnd; /* coupling start band (for bit allocation) */ + uint8_t ncplbnd; /* number of coupling bands */ + + uint8_t rematflg; /* stereo rematrixing */ + + uint8_t endmant[5]; /* channel end mantissa */ + + uint16_t bai; /* bit allocation information */ + + uint32_t * buffer_start; + uint16_t lfsr_state; /* dither state */ + uint32_t bits_left; + uint32_t current_word; + + uint8_t csnroffst; /* coarse SNR offset */ + ba_t cplba; /* coupling bit allocation parameters */ + ba_t ba[5]; /* channel bit allocation parameters */ + ba_t lfeba; /* lfe bit allocation parameters */ + + uint8_t cplfleak; /* coupling fast leak init */ + uint8_t cplsleak; /* coupling slow leak init */ + + expbap_t cpl_expbap; + expbap_t fbw_expbap[5]; + expbap_t lfe_expbap; + + sample_t * samples; + int downmixed; +}; + +#define LEVEL_PLUS6DB 2.0 +#define LEVEL_PLUS3DB 1.4142135623730951 +#define LEVEL_3DB 0.7071067811865476 +#define LEVEL_45DB 0.5946035575013605 +#define LEVEL_6DB 0.5 + +#define EXP_REUSE (0) +#define EXP_D15 (1) +#define EXP_D25 (2) +#define EXP_D45 (3) + +#define DELTA_BIT_REUSE (0) +#define DELTA_BIT_NEW (1) +#define DELTA_BIT_NONE (2) +#define DELTA_BIT_RESERVED (3) + +void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, + int start, int end, int fastleak, int slowleak, + expbap_t * expbap); + +int a52_downmix_init (int input, int flags, level_t * level, + level_t clev, level_t slev); +int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level, + level_t clev, level_t slev); +void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, + level_t clev, level_t slev); +void a52_upmix (sample_t * samples, int acmod, int output); + +void a52_imdct_init (uint32_t mm_accel); +void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias); +void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias); +//extern void (* a52_imdct_256) (sample_t data[], sample_t delay[], sample_t bias); +//extern void (* a52_imdct_512) (sample_t data[], sample_t delay[], sample_t bias); + +#define ROUND(x) ((int)((x) + ((x) > 0 ? 0.5 : -0.5))) + +#ifndef LIBA52_FIXED + +typedef sample_t quantizer_t; +#define SAMPLE(x) (x) +#define LEVEL(x) (x) +#define MUL(a,b) ((a) * (b)) +#define MUL_L(a,b) ((a) * (b)) +#define MUL_C(a,b) ((a) * (b)) +#define DIV(a,b) ((a) / (b)) +#define BIAS(x) ((x) + bias) + +#else /* LIBA52_FIXED */ + +typedef int16_t quantizer_t; +#define SAMPLE(x) (sample_t)((x) * (1 << 30)) +#define LEVEL(x) (level_t)((x) * (1 << 26)) + +#if 0 +#define MUL(a,b) ((int)(((int64_t)(a) * (b) + (1 << 29)) >> 30)) +#define MUL_L(a,b) ((int)(((int64_t)(a) * (b) + (1 << 25)) >> 26)) +#elif 1 +#define MUL(a,b) \ +({ int32_t _ta=(a), _tb=(b), _tc; \ + _tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)(((_tc >> 14))+ (((_ta >> 16)*(_tb >> 16)) << 2 )); }) +#define MUL_L(a,b) \ +({ int32_t _ta=(a), _tb=(b), _tc; \ + _tc=(_ta & 0xffff)*(_tb >> 16)+(_ta >> 16)*(_tb & 0xffff); (int32_t)((_tc >> 10) + (((_ta >> 16)*(_tb >> 16)) << 6)); }) +#else +#define MUL(a,b) (((a) >> 15) * ((b) >> 15)) +#define MUL_L(a,b) (((a) >> 13) * ((b) >> 13)) +#endif + +#define MUL_C(a,b) MUL_L (a, LEVEL (b)) +#define DIV(a,b) ((((int64_t)LEVEL (a)) << 26) / (b)) +#define BIAS(x) (x) + +#endif diff --git a/mpeg4/src/libavcodec/liba52/a52_util.h b/mpeg4/src/libavcodec/liba52/a52_util.h new file mode 100644 index 0000000000000000000000000000000000000000..8ef2cece9d42cbab2cd614dbef2e37f66ed37989 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/a52_util.h @@ -0,0 +1,32 @@ +/* + * a52_util.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef A52_UTIL_H +#define A52_UTIL_H + +uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes); + +void* a52_resample_init(uint32_t mm_accel,int flags,int chans); +extern int (* a52_resample) (float * _f, int16_t * s16); + +#endif /* A52_H */ diff --git a/mpeg4/src/libavcodec/liba52/bit_allocate.c b/mpeg4/src/libavcodec/liba52/bit_allocate.c new file mode 100644 index 0000000000000000000000000000000000000000..415a08d219d5266547f5b2174c5f2de4b8211c04 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/bit_allocate.c @@ -0,0 +1,260 @@ +/* + * bit_allocate.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "a52.h" +#include "a52_internal.h" + +static int hthtab[3][50] = { + {0x730, 0x730, 0x7c0, 0x800, 0x820, 0x840, 0x850, 0x850, 0x860, 0x860, + 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x890, 0x890, + 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, + 0x910, 0x910, 0x910, 0x910, 0x900, 0x8f0, 0x8c0, 0x870, 0x820, 0x7e0, + 0x7a0, 0x770, 0x760, 0x7a0, 0x7c0, 0x7c0, 0x6e0, 0x400, 0x3c0, 0x3c0}, + {0x710, 0x710, 0x7a0, 0x7f0, 0x820, 0x830, 0x840, 0x850, 0x850, 0x860, + 0x860, 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, + 0x890, 0x890, 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8e0, 0x8f0, + 0x900, 0x910, 0x910, 0x910, 0x910, 0x900, 0x8e0, 0x8b0, 0x870, 0x820, + 0x7e0, 0x7b0, 0x760, 0x770, 0x7a0, 0x7c0, 0x780, 0x5d0, 0x3c0, 0x3c0}, + {0x680, 0x680, 0x750, 0x7b0, 0x7e0, 0x810, 0x820, 0x830, 0x840, 0x850, + 0x850, 0x850, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, + 0x870, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, 0x890, 0x8a0, 0x8b0, + 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, 0x910, 0x910, 0x910, 0x900, 0x8f0, + 0x8d0, 0x8b0, 0x840, 0x7f0, 0x790, 0x760, 0x7a0, 0x7c0, 0x7b0, 0x720} +}; + +static int8_t baptab[305] = { + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, /* 93 padding elems */ + + 16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 14, 14, 14, 14, 14, 14, + 14, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, + 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, + 5, 4, 4, -3, -3, 3, 3, 3, -2, -2, -1, -1, -1, -1, -1, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 /* 148 padding elems */ +}; + +static int bndtab[30] = {21, 22, 23, 24, 25, 26, 27, 28, 31, 34, + 37, 40, 43, 46, 49, 55, 61, 67, 73, 79, + 85, 97, 109, 121, 133, 157, 181, 205, 229, 253}; + +static int8_t latab[256] = { + -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, + -52, -52, -51, -50, -49, -48, -47, -47, -46, -45, -44, -44, + -43, -42, -41, -41, -40, -39, -38, -38, -37, -36, -36, -35, + -35, -34, -33, -33, -32, -32, -31, -30, -30, -29, -29, -28, + -28, -27, -27, -26, -26, -25, -25, -24, -24, -23, -23, -22, + -22, -21, -21, -21, -20, -20, -19, -19, -19, -18, -18, -18, + -17, -17, -17, -16, -16, -16, -15, -15, -15, -14, -14, -14, + -13, -13, -13, -13, -12, -12, -12, -12, -11, -11, -11, -11, + -10, -10, -10, -10, -10, -9, -9, -9, -9, -9, -8, -8, + -8, -8, -8, -8, -7, -7, -7, -7, -7, -7, -6, -6, + -6, -6, -6, -6, -6, -6, -5, -5, -5, -5, -5, -5, + -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; + +#define UPDATE_LEAK() \ +do { \ + fastleak += fdecay; \ + if (fastleak > psd + fgain) \ + fastleak = psd + fgain; \ + slowleak += sdecay; \ + if (slowleak > psd + sgain) \ + slowleak = psd + sgain; \ +} while (0) + +#define COMPUTE_MASK() \ +do { \ + if (psd > dbknee) \ + mask -= (psd - dbknee) >> 2; \ + if (mask > hth [i >> halfrate]) \ + mask = hth [i >> halfrate]; \ + mask -= snroffset + 128 * deltba[i]; \ + mask = (mask > 0) ? 0 : ((-mask) >> 5); \ + mask -= floor; \ +} while (0) + +void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart, + int start, int end, int fastleak, int slowleak, + expbap_t * expbap) +{ + static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410}; + static int dbpbtab[4] = {0xc00, 0x500, 0x300, 0x100}; + static int floortab[8] = {0x910, 0x950, 0x990, 0x9d0, + 0xa10, 0xa90, 0xb10, 0x1400}; + + int i, j; + uint8_t * exp; + int8_t * bap; + int fdecay, fgain, sdecay, sgain, dbknee, floor, snroffset; + int psd, mask; + int8_t * deltba; + int * hth; + int halfrate; + + halfrate = state->halfrate; + fdecay = (63 + 20 * ((state->bai >> 7) & 3)) >> halfrate; /* fdcycod */ + fgain = 128 + 128 * (ba->bai & 7); /* fgaincod */ + sdecay = (15 + 2 * (state->bai >> 9)) >> halfrate; /* sdcycod */ + sgain = slowgain[(state->bai >> 5) & 3]; /* sgaincod */ + dbknee = dbpbtab[(state->bai >> 3) & 3]; /* dbpbcod */ + hth = hthtab[state->fscod]; + /* + * if there is no delta bit allocation, make deltba point to an area + * known to contain zeroes. baptab+156 here. + */ + deltba = (ba->deltbae == DELTA_BIT_NONE) ? baptab + 156 : ba->deltba; + floor = floortab[state->bai & 7]; /* floorcod */ + snroffset = 960 - 64 * state->csnroffst - 4 * (ba->bai >> 3) + floor; + floor >>= 5; + + exp = expbap->exp; + bap = expbap->bap; + + i = bndstart; + j = start; + if (start == 0) { /* not the coupling channel */ + int lowcomp; + + lowcomp = 0; + j = end - 1; + do { + if (i < j) { + if (exp[i+1] == exp[i] - 2) + lowcomp = 384; + else if (lowcomp && (exp[i+1] > exp[i])) + lowcomp -= 64; + } + psd = 128 * exp[i]; + mask = psd + fgain + lowcomp; + COMPUTE_MASK (); + bap[i] = (baptab+156)[mask + 4 * exp[i]]; + i++; + } while ((i < 3) || ((i < 7) && (exp[i] > exp[i-1]))); + fastleak = psd + fgain; + slowleak = psd + sgain; + + while (i < 7) { + if (i < j) { + if (exp[i+1] == exp[i] - 2) + lowcomp = 384; + else if (lowcomp && (exp[i+1] > exp[i])) + lowcomp -= 64; + } + psd = 128 * exp[i]; + UPDATE_LEAK (); + mask = ((fastleak + lowcomp < slowleak) ? + fastleak + lowcomp : slowleak); + COMPUTE_MASK (); + bap[i] = (baptab+156)[mask + 4 * exp[i]]; + i++; + } + + if (end == 7) /* lfe channel */ + return; + + do { + if (exp[i+1] == exp[i] - 2) + lowcomp = 320; + else if (lowcomp && (exp[i+1] > exp[i])) + lowcomp -= 64; + psd = 128 * exp[i]; + UPDATE_LEAK (); + mask = ((fastleak + lowcomp < slowleak) ? + fastleak + lowcomp : slowleak); + COMPUTE_MASK (); + bap[i] = (baptab+156)[mask + 4 * exp[i]]; + i++; + } while (i < 20); + + while (lowcomp > 128) { /* two iterations maximum */ + lowcomp -= 128; + psd = 128 * exp[i]; + UPDATE_LEAK (); + mask = ((fastleak + lowcomp < slowleak) ? + fastleak + lowcomp : slowleak); + COMPUTE_MASK (); + bap[i] = (baptab+156)[mask + 4 * exp[i]]; + i++; + } + j = i; + } + + do { + int startband, endband; + + startband = j; + endband = (bndtab[i-20] < end) ? bndtab[i-20] : end; + psd = 128 * exp[j++]; + while (j < endband) { + int next, delta; + + next = 128 * exp[j++]; + delta = next - psd; + switch (delta >> 9) { + case -6: case -5: case -4: case -3: case -2: + psd = next; + break; + case -1: + psd = next + latab[(-delta) >> 1]; + break; + case 0: + psd += latab[delta >> 1]; + break; + } + } + /* minpsd = -289 */ + UPDATE_LEAK (); + mask = (fastleak < slowleak) ? fastleak : slowleak; + COMPUTE_MASK (); + i++; + j = startband; + do { + /* max(mask+4*exp)=147=-(minpsd+fgain-deltba-snroffset)>>5+4*exp */ + /* min(mask+4*exp)=-156=-(sgain-deltba-snroffset)>>5 */ + bap[j] = (baptab+156)[mask + 4 * exp[j]]; + } while (++j < endband); + } while (j < end); +} diff --git a/mpeg4/src/libavcodec/liba52/bitstream.c b/mpeg4/src/libavcodec/liba52/bitstream.c new file mode 100644 index 0000000000000000000000000000000000000000..f6b05c5e695a0eacf56f083b66e46037687c6227 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/bitstream.c @@ -0,0 +1,91 @@ +/* + * bitstream.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "a52.h" +#include "a52_internal.h" +#include "bitstream.h" + +#define BUFFER_SIZE 4096 + +void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf) +{ + int align; + + align = (long)buf & 3; + state->buffer_start = (uint32_t *) (buf - align); + state->bits_left = 0; + state->current_word = 0; + bitstream_get (state, align * 8); +} + +static inline void bitstream_fill_current (a52_state_t * state) +{ + uint32_t tmp; + + tmp = *(state->buffer_start++); + state->current_word = swab32 (tmp); +} + +/* + * The fast paths for _get is in the + * bitstream.h header file so it can be inlined. + * + * The "bottom half" of this routine is suffixed _bh + * + * -ah + */ + +uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits) +{ + uint32_t result; + + num_bits -= state->bits_left; + result = ((state->current_word << (32 - state->bits_left)) >> + (32 - state->bits_left)); + + bitstream_fill_current (state); + + if (num_bits != 0) + result = (result << num_bits) | (state->current_word >> (32 - num_bits)); + + state->bits_left = 32 - num_bits; + + return result; +} + +int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits) +{ + int32_t result; + + num_bits -= state->bits_left; + result = ((((int32_t)state->current_word) << (32 - state->bits_left)) >> + (32 - state->bits_left)); + + bitstream_fill_current(state); + + if (num_bits != 0) + result = (result << num_bits) | (state->current_word >> (32 - num_bits)); + + state->bits_left = 32 - num_bits; + + return result; +} diff --git a/mpeg4/src/libavcodec/liba52/bitstream.h b/mpeg4/src/libavcodec/liba52/bitstream.h new file mode 100644 index 0000000000000000000000000000000000000000..4a64bf3d9611d94f5c018b23610f3025c8c22ada --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/bitstream.h @@ -0,0 +1,77 @@ +/* + * bitstream.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* (stolen from the kernel) */ +#ifdef WORDS_BIGENDIAN + +# define swab32(x) (x) + +#else + +# if 0 && defined (__i386__) + +# define swab32(x) __i386_swab32(x) + static inline const uint32_t __i386_swab32(uint32_t x) + { + __asm__("bswap %0" : "=r" (x) : "0" (x)); + return x; + } + +# else + +# define swab32(x)\ +((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ + (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) + +# endif +#endif + +void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf); +uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits); +int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits); + +static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits) +{ + uint32_t result; + + if (num_bits < state->bits_left) { + result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits); + state->bits_left -= num_bits; + return result; + } + + return a52_bitstream_get_bh (state, num_bits); +} + +static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits) +{ + int32_t result; + + if (num_bits < state->bits_left) { + result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits); + state->bits_left -= num_bits; + return result; + } + + return a52_bitstream_get_bh_2 (state, num_bits); +} diff --git a/mpeg4/src/libavcodec/liba52/crc.c b/mpeg4/src/libavcodec/liba52/crc.c new file mode 100644 index 0000000000000000000000000000000000000000..1ec4b085f3d33554d5708572b3354412000a9c6f --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/crc.c @@ -0,0 +1,73 @@ +/* + * crc.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include +#include +#include + +static const uint16_t crc_lut[256] = +{ + 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, + 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, + 0x8063,0x0066,0x006c,0x8069,0x0078,0x807d,0x8077,0x0072, + 0x0050,0x8055,0x805f,0x005a,0x804b,0x004e,0x0044,0x8041, + 0x80c3,0x00c6,0x00cc,0x80c9,0x00d8,0x80dd,0x80d7,0x00d2, + 0x00f0,0x80f5,0x80ff,0x00fa,0x80eb,0x00ee,0x00e4,0x80e1, + 0x00a0,0x80a5,0x80af,0x00aa,0x80bb,0x00be,0x00b4,0x80b1, + 0x8093,0x0096,0x009c,0x8099,0x0088,0x808d,0x8087,0x0082, + 0x8183,0x0186,0x018c,0x8189,0x0198,0x819d,0x8197,0x0192, + 0x01b0,0x81b5,0x81bf,0x01ba,0x81ab,0x01ae,0x01a4,0x81a1, + 0x01e0,0x81e5,0x81ef,0x01ea,0x81fb,0x01fe,0x01f4,0x81f1, + 0x81d3,0x01d6,0x01dc,0x81d9,0x01c8,0x81cd,0x81c7,0x01c2, + 0x0140,0x8145,0x814f,0x014a,0x815b,0x015e,0x0154,0x8151, + 0x8173,0x0176,0x017c,0x8179,0x0168,0x816d,0x8167,0x0162, + 0x8123,0x0126,0x012c,0x8129,0x0138,0x813d,0x8137,0x0132, + 0x0110,0x8115,0x811f,0x011a,0x810b,0x010e,0x0104,0x8101, + 0x8303,0x0306,0x030c,0x8309,0x0318,0x831d,0x8317,0x0312, + 0x0330,0x8335,0x833f,0x033a,0x832b,0x032e,0x0324,0x8321, + 0x0360,0x8365,0x836f,0x036a,0x837b,0x037e,0x0374,0x8371, + 0x8353,0x0356,0x035c,0x8359,0x0348,0x834d,0x8347,0x0342, + 0x03c0,0x83c5,0x83cf,0x03ca,0x83db,0x03de,0x03d4,0x83d1, + 0x83f3,0x03f6,0x03fc,0x83f9,0x03e8,0x83ed,0x83e7,0x03e2, + 0x83a3,0x03a6,0x03ac,0x83a9,0x03b8,0x83bd,0x83b7,0x03b2, + 0x0390,0x8395,0x839f,0x039a,0x838b,0x038e,0x0384,0x8381, + 0x0280,0x8285,0x828f,0x028a,0x829b,0x029e,0x0294,0x8291, + 0x82b3,0x02b6,0x02bc,0x82b9,0x02a8,0x82ad,0x82a7,0x02a2, + 0x82e3,0x02e6,0x02ec,0x82e9,0x02f8,0x82fd,0x82f7,0x02f2, + 0x02d0,0x82d5,0x82df,0x02da,0x82cb,0x02ce,0x02c4,0x82c1, + 0x8243,0x0246,0x024c,0x8249,0x0258,0x825d,0x8257,0x0252, + 0x0270,0x8275,0x827f,0x027a,0x826b,0x026e,0x0264,0x8261, + 0x0220,0x8225,0x822f,0x022a,0x823b,0x023e,0x0234,0x8231, + 0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202 +}; + +uint16_t a52_crc16_block(uint8_t *data,uint32_t num_bytes) +{ + uint32_t i; + uint16_t state=0; + + for(i=0;i>8)] ^ (state<<8); + + return state; +} diff --git a/mpeg4/src/libavcodec/liba52/downmix.c b/mpeg4/src/libavcodec/liba52/downmix.c new file mode 100644 index 0000000000000000000000000000000000000000..7999b7db0910f17f6bf32bc943c00fe0582d8153 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/downmix.c @@ -0,0 +1,679 @@ +/* + * downmix.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "a52.h" +#include "a52_internal.h" + +#define CONVERT(acmod,output) (((output) << 3) + (acmod)) + +int a52_downmix_init (int input, int flags, level_t * level, + level_t clev, level_t slev) +{ + static uint8_t table[11][8] = { + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, + A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, + {A52_MONO, A52_MONO, A52_MONO, A52_MONO, + A52_MONO, A52_MONO, A52_MONO, A52_MONO}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, + A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, + A52_STEREO, A52_3F, A52_STEREO, A52_3F}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, + A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, + A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, + A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, + A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R}, + {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO, + A52_MONO, A52_MONO, A52_MONO, A52_MONO}, + {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO, + A52_MONO, A52_MONO, A52_MONO, A52_MONO}, + {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY, + A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY} + }; + int output; + + output = flags & A52_CHANNEL_MASK; + if (output > A52_DOLBY) + return -1; + + output = table[output][input & 7]; + + if (output == A52_STEREO && + (input == A52_DOLBY || (input == A52_3F && clev == LEVEL (LEVEL_3DB)))) + output = A52_DOLBY; + + if (flags & A52_ADJUST_LEVEL) { + level_t adjust; + + switch (CONVERT (input & 7, output)) { + + case CONVERT (A52_3F, A52_MONO): + adjust = DIV (LEVEL_3DB, LEVEL (1) + clev); + break; + + case CONVERT (A52_STEREO, A52_MONO): + case CONVERT (A52_2F2R, A52_2F1R): + case CONVERT (A52_3F2R, A52_3F1R): + level_3db: + adjust = LEVEL (LEVEL_3DB); + break; + + case CONVERT (A52_3F2R, A52_2F1R): + if (clev < LEVEL (LEVEL_PLUS3DB - 1)) + goto level_3db; + /* break thru */ + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F1R, A52_2F1R): + case CONVERT (A52_3F1R, A52_2F2R): + case CONVERT (A52_3F2R, A52_2F2R): + adjust = DIV (1, LEVEL (1) + clev); + break; + + case CONVERT (A52_2F1R, A52_MONO): + adjust = DIV (LEVEL_PLUS3DB, LEVEL (2) + slev); + break; + + case CONVERT (A52_2F1R, A52_STEREO): + case CONVERT (A52_3F1R, A52_3F): + adjust = DIV (1, LEVEL (1) + MUL_C (slev, LEVEL_3DB)); + break; + + case CONVERT (A52_3F1R, A52_MONO): + adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + MUL_C (slev, 0.5)); + break; + + case CONVERT (A52_3F1R, A52_STEREO): + adjust = DIV (1, LEVEL (1) + clev + MUL_C (slev, LEVEL_3DB)); + break; + + case CONVERT (A52_2F2R, A52_MONO): + adjust = DIV (LEVEL_3DB, LEVEL (1) + slev); + break; + + case CONVERT (A52_2F2R, A52_STEREO): + case CONVERT (A52_3F2R, A52_3F): + adjust = DIV (1, LEVEL (1) + slev); + break; + + case CONVERT (A52_3F2R, A52_MONO): + adjust = DIV (LEVEL_3DB, LEVEL (1) + clev + slev); + break; + + case CONVERT (A52_3F2R, A52_STEREO): + adjust = DIV (1, LEVEL (1) + clev + slev); + break; + + case CONVERT (A52_MONO, A52_DOLBY): + adjust = LEVEL (LEVEL_PLUS3DB); + break; + + case CONVERT (A52_3F, A52_DOLBY): + case CONVERT (A52_2F1R, A52_DOLBY): + adjust = LEVEL (1 / (1 + LEVEL_3DB)); + break; + + case CONVERT (A52_3F1R, A52_DOLBY): + case CONVERT (A52_2F2R, A52_DOLBY): + adjust = LEVEL (1 / (1 + 2 * LEVEL_3DB)); + break; + + case CONVERT (A52_3F2R, A52_DOLBY): + adjust = LEVEL (1 / (1 + 3 * LEVEL_3DB)); + break; + + default: + return output; + } + + *level = MUL_L (*level, adjust); + } + + return output; +} + +int a52_downmix_coeff (level_t * coeff, int acmod, int output, level_t level, + level_t clev, level_t slev) +{ + level_t level_3db; + + level_3db = MUL_C (level, LEVEL_3DB); + + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL): + case CONVERT (A52_MONO, A52_MONO): + case CONVERT (A52_STEREO, A52_STEREO): + case CONVERT (A52_3F, A52_3F): + case CONVERT (A52_2F1R, A52_2F1R): + case CONVERT (A52_3F1R, A52_3F1R): + case CONVERT (A52_2F2R, A52_2F2R): + case CONVERT (A52_3F2R, A52_3F2R): + case CONVERT (A52_STEREO, A52_DOLBY): + coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level; + return 0; + + case CONVERT (A52_CHANNEL, A52_MONO): + coeff[0] = coeff[1] = MUL_C (level, LEVEL_6DB); + return 3; + + case CONVERT (A52_STEREO, A52_MONO): + coeff[0] = coeff[1] = level_3db; + return 3; + + case CONVERT (A52_3F, A52_MONO): + coeff[0] = coeff[2] = level_3db; + coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB); + return 7; + + case CONVERT (A52_2F1R, A52_MONO): + coeff[0] = coeff[1] = level_3db; + coeff[2] = MUL_L (level_3db, slev); + return 7; + + case CONVERT (A52_2F2R, A52_MONO): + coeff[0] = coeff[1] = level_3db; + coeff[2] = coeff[3] = MUL_L (level_3db, slev); + return 15; + + case CONVERT (A52_3F1R, A52_MONO): + coeff[0] = coeff[2] = level_3db; + coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB); + coeff[3] = MUL_L (level_3db, slev); + return 15; + + case CONVERT (A52_3F2R, A52_MONO): + coeff[0] = coeff[2] = level_3db; + coeff[1] = MUL_C (MUL_L (level_3db, clev), LEVEL_PLUS6DB); + coeff[3] = coeff[4] = MUL_L (level_3db, slev); + return 31; + + case CONVERT (A52_MONO, A52_DOLBY): + coeff[0] = level_3db; + return 0; + + case CONVERT (A52_3F, A52_DOLBY): + coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; + coeff[1] = level_3db; + return 7; + + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F1R, A52_2F1R): + case CONVERT (A52_3F2R, A52_2F2R): + coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; + coeff[1] = MUL_L (level, clev); + return 7; + + case CONVERT (A52_2F1R, A52_DOLBY): + coeff[0] = coeff[1] = level; + coeff[2] = level_3db; + return 7; + + case CONVERT (A52_2F1R, A52_STEREO): + coeff[0] = coeff[1] = level; + coeff[2] = MUL_L (level_3db, slev); + return 7; + + case CONVERT (A52_3F1R, A52_DOLBY): + coeff[0] = coeff[2] = level; + coeff[1] = coeff[3] = level_3db; + return 15; + + case CONVERT (A52_3F1R, A52_STEREO): + coeff[0] = coeff[2] = level; + coeff[1] = MUL_L (level, clev); + coeff[3] = MUL_L (level_3db, slev); + return 15; + + case CONVERT (A52_2F2R, A52_DOLBY): + coeff[0] = coeff[1] = level; + coeff[2] = coeff[3] = level_3db; + return 15; + + case CONVERT (A52_2F2R, A52_STEREO): + coeff[0] = coeff[1] = level; + coeff[2] = coeff[3] = MUL_L (level, slev); + return 15; + + case CONVERT (A52_3F2R, A52_DOLBY): + coeff[0] = coeff[2] = level; + coeff[1] = coeff[3] = coeff[4] = level_3db; + return 31; + + case CONVERT (A52_3F2R, A52_2F1R): + coeff[0] = coeff[2] = level; + coeff[1] = MUL_L (level, clev); + coeff[3] = coeff[4] = level_3db; + return 31; + + case CONVERT (A52_3F2R, A52_STEREO): + coeff[0] = coeff[2] = level; + coeff[1] = MUL_L (level, clev); + coeff[3] = coeff[4] = MUL_L (level, slev); + return 31; + + case CONVERT (A52_3F1R, A52_3F): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = MUL_L (level_3db, slev); + return 13; + + case CONVERT (A52_3F2R, A52_3F): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = coeff[4] = MUL_L (level, slev); + return 29; + + case CONVERT (A52_2F2R, A52_2F1R): + coeff[0] = coeff[1] = level; + coeff[2] = coeff[3] = level_3db; + return 12; + + case CONVERT (A52_3F2R, A52_3F1R): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = coeff[4] = level_3db; + return 24; + + case CONVERT (A52_2F1R, A52_2F2R): + coeff[0] = coeff[1] = level; + coeff[2] = level_3db; + return 0; + + case CONVERT (A52_3F1R, A52_2F2R): + coeff[0] = coeff[2] = level; + coeff[1] = MUL_L (level, clev); + coeff[3] = level_3db; + return 7; + + case CONVERT (A52_3F1R, A52_3F2R): + coeff[0] = coeff[1] = coeff[2] = level; + coeff[3] = level_3db; + return 0; + + case CONVERT (A52_CHANNEL, A52_CHANNEL1): + coeff[0] = level; + coeff[1] = 0; + return 0; + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + coeff[0] = 0; + coeff[1] = level; + return 0; + } + + return -1; /* NOTREACHED */ +} + +static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] += BIAS (src[i]); +} + +static void mix3to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += BIAS (samples[i + 256] + samples[i + 512]); +} + +static void mix4to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += BIAS (samples[i + 256] + samples[i + 512] + + samples[i + 768]); +} + +static void mix5to1 (sample_t * samples, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] += BIAS (samples[i + 256] + samples[i + 512] + + samples[i + 768] + samples[i + 1024]); +} + +static void mix3to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = BIAS (samples[i + 256]); + samples[i] += common; + samples[i + 256] = samples[i + 512] + common; + } +} + +static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = BIAS (right[i + 256]); + left[i] += common; + right[i] += common; + } +} + +static void mix21toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512]; + samples[i] += BIAS (-surround); + samples[i + 256] += BIAS (surround); + } +} + +static void mix31to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = BIAS (samples[i + 256] + samples[i + 768]); + samples[i] += common; + samples[i + 256] = samples[i + 512] + common; + } +} + +static void mix31toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t common, surround; + + for (i = 0; i < 256; i++) { + common = BIAS (samples[i + 256]); + surround = samples[i + 768]; + samples[i] += common - surround; + samples[i + 256] = samples[i + 512] + common + surround; + } +} + +static void mix22toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512] + samples[i + 768]; + samples[i] += BIAS (-surround); + samples[i + 256] += BIAS (surround); + } +} + +static void mix32to2 (sample_t * samples, sample_t bias) +{ + int i; + sample_t common; + + for (i = 0; i < 256; i++) { + common = BIAS (samples[i + 256]); + samples[i] += common + samples[i + 768]; + samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; + } +} + +static void mix32toS (sample_t * samples, sample_t bias) +{ + int i; + sample_t common, surround; + + for (i = 0; i < 256; i++) { + common = BIAS (samples[i + 256]); + surround = samples[i + 768] + samples[i + 1024]; + samples[i] += common - surround; + samples[i + 256] = samples[i + 512] + common + surround; + } +} + +static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] = BIAS (src[i] + src[i + 256]); +} + +static void zero (sample_t * samples) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = 0; +} + +void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias, + level_t clev, level_t slev) +{ + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + memcpy (samples, samples + 256, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_CHANNEL, A52_MONO): + case CONVERT (A52_STEREO, A52_MONO): + mix_2to1: + mix2to1 (samples, samples + 256, bias); + break; + + case CONVERT (A52_2F1R, A52_MONO): + if (slev == 0) + goto mix_2to1; + case CONVERT (A52_3F, A52_MONO): + mix_3to1: + mix3to1 (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_MONO): + if (slev == 0) + goto mix_3to1; + case CONVERT (A52_2F2R, A52_MONO): + if (slev == 0) + goto mix_2to1; + mix4to1 (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_MONO): + if (slev == 0) + goto mix_3to1; + mix5to1 (samples, bias); + break; + + case CONVERT (A52_MONO, A52_DOLBY): + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F, A52_DOLBY): + mix_3to2: + mix3to2 (samples, bias); + break; + + case CONVERT (A52_2F1R, A52_STEREO): + if (slev == 0) + break; + mix21to2 (samples, samples + 256, bias); + break; + + case CONVERT (A52_2F1R, A52_DOLBY): + mix21toS (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_STEREO): + if (slev == 0) + goto mix_3to2; + mix31to2 (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_DOLBY): + mix31toS (samples, bias); + break; + + case CONVERT (A52_2F2R, A52_STEREO): + if (slev == 0) + break; + mix2to1 (samples, samples + 512, bias); + mix2to1 (samples + 256, samples + 768, bias); + break; + + case CONVERT (A52_2F2R, A52_DOLBY): + mix22toS (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_STEREO): + if (slev == 0) + goto mix_3to2; + mix32to2 (samples, bias); + break; + + case CONVERT (A52_3F2R, A52_DOLBY): + mix32toS (samples, bias); + break; + + case CONVERT (A52_3F1R, A52_3F): + if (slev == 0) + break; + mix21to2 (samples, samples + 512, bias); + break; + + case CONVERT (A52_3F2R, A52_3F): + if (slev == 0) + break; + mix2to1 (samples, samples + 768, bias); + mix2to1 (samples + 512, samples + 1024, bias); + break; + + case CONVERT (A52_3F1R, A52_2F1R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_2F2R, A52_2F1R): + mix2to1 (samples + 512, samples + 768, bias); + break; + + case CONVERT (A52_3F2R, A52_2F1R): + mix3to2 (samples, bias); + move2to1 (samples + 768, samples + 512, bias); + break; + + case CONVERT (A52_3F2R, A52_3F1R): + mix2to1 (samples + 768, samples + 1024, bias); + break; + + case CONVERT (A52_2F1R, A52_2F2R): + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F1R, A52_2F2R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F2R, A52_2F2R): + mix3to2 (samples, bias); + memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); + memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F1R, A52_3F2R): + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); + break; + } +} + +void a52_upmix (sample_t * samples, int acmod, int output) +{ + switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { + + case CONVERT (A52_CHANNEL, A52_CHANNEL2): + memcpy (samples + 256, samples, 256 * sizeof (sample_t)); + break; + + case CONVERT (A52_3F2R, A52_MONO): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_MONO): + case CONVERT (A52_2F2R, A52_MONO): + zero (samples + 768); + case CONVERT (A52_3F, A52_MONO): + case CONVERT (A52_2F1R, A52_MONO): + zero (samples + 512); + case CONVERT (A52_CHANNEL, A52_MONO): + case CONVERT (A52_STEREO, A52_MONO): + zero (samples + 256); + break; + + case CONVERT (A52_3F2R, A52_STEREO): + case CONVERT (A52_3F2R, A52_DOLBY): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_STEREO): + case CONVERT (A52_3F1R, A52_DOLBY): + zero (samples + 768); + case CONVERT (A52_3F, A52_STEREO): + case CONVERT (A52_3F, A52_DOLBY): + mix_3to2: + memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); + zero (samples + 256); + break; + + case CONVERT (A52_2F2R, A52_STEREO): + case CONVERT (A52_2F2R, A52_DOLBY): + zero (samples + 768); + case CONVERT (A52_2F1R, A52_STEREO): + case CONVERT (A52_2F1R, A52_DOLBY): + zero (samples + 512); + break; + + case CONVERT (A52_3F2R, A52_3F): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_3F): + case CONVERT (A52_2F2R, A52_2F1R): + zero (samples + 768); + break; + + case CONVERT (A52_3F2R, A52_3F1R): + zero (samples + 1024); + break; + + case CONVERT (A52_3F2R, A52_2F1R): + zero (samples + 1024); + case CONVERT (A52_3F1R, A52_2F1R): + mix_31to21: + memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); + goto mix_3to2; + + case CONVERT (A52_3F2R, A52_2F2R): + memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); + goto mix_31to21; + } +} diff --git a/mpeg4/src/libavcodec/liba52/imdct.c b/mpeg4/src/libavcodec/liba52/imdct.c new file mode 100644 index 0000000000000000000000000000000000000000..21a2a65656d3a1e8dfcae85fe508d49180cce728 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/imdct.c @@ -0,0 +1,411 @@ +/* + * imdct.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * The ifft algorithms in this file have been largely inspired by Dan + * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "a52.h" +#include "a52_internal.h" +#include "mm_accel.h" + +typedef struct complex_s { + sample_t real; + sample_t imag; +} complex_t; + +static uint8_t fftorder[] = { + 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, + 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, + 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, + 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172, + 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178, + 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90, + 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174, + 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 +}; + +/* Root values for IFFT */ +static sample_t roots16[3]; +static sample_t roots32[7]; +static sample_t roots64[15]; +static sample_t roots128[31]; + +/* Twiddle factors for IMDCT */ +static complex_t pre1[128]; +static complex_t post1[64]; +static complex_t pre2[64]; +static complex_t post2[32]; + +static sample_t a52_imdct_window[256]; + +static void (* ifft128) (complex_t * buf); +static void (* ifft64) (complex_t * buf); + +static inline void ifft2 (complex_t * buf) +{ + sample_t r, i; + + r = buf[0].real; + i = buf[0].imag; + buf[0].real += buf[1].real; + buf[0].imag += buf[1].imag; + buf[1].real = r - buf[1].real; + buf[1].imag = i - buf[1].imag; +} + +static inline void ifft4 (complex_t * buf) +{ + sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; + + tmp1 = buf[0].real + buf[1].real; + tmp2 = buf[3].real + buf[2].real; + tmp3 = buf[0].imag + buf[1].imag; + tmp4 = buf[2].imag + buf[3].imag; + tmp5 = buf[0].real - buf[1].real; + tmp6 = buf[0].imag - buf[1].imag; + tmp7 = buf[2].imag - buf[3].imag; + tmp8 = buf[3].real - buf[2].real; + + buf[0].real = tmp1 + tmp2; + buf[0].imag = tmp3 + tmp4; + buf[2].real = tmp1 - tmp2; + buf[2].imag = tmp3 - tmp4; + buf[1].real = tmp5 + tmp7; + buf[1].imag = tmp6 + tmp8; + buf[3].real = tmp5 - tmp7; + buf[3].imag = tmp6 - tmp8; +} + +/* basic radix-2 ifft butterfly */ + +#define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \ + t0 = MUL (W1, d1) + MUL (W0, d0); \ + t1 = MUL (W0, d1) - MUL (W1, d0); \ +} while (0) + +/* radix-2 ifft butterfly with bias */ + +#define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \ + t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \ + t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \ +} while (0) + +/* the basic split-radix ifft butterfly */ + +#define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ + BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \ + BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \ + tmp1 = tmp5 + tmp7; \ + tmp2 = tmp6 + tmp8; \ + tmp3 = tmp6 - tmp8; \ + tmp4 = tmp7 - tmp5; \ + a2.real = a0.real - tmp1; \ + a2.imag = a0.imag - tmp2; \ + a3.real = a1.real - tmp3; \ + a3.imag = a1.imag - tmp4; \ + a0.real += tmp1; \ + a0.imag += tmp2; \ + a1.real += tmp3; \ + a1.imag += tmp4; \ +} while (0) + +/* split-radix ifft butterfly, specialized for wr=1 wi=0 */ + +#define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ + tmp1 = a2.real + a3.real; \ + tmp2 = a2.imag + a3.imag; \ + tmp3 = a2.imag - a3.imag; \ + tmp4 = a3.real - a2.real; \ + a2.real = a0.real - tmp1; \ + a2.imag = a0.imag - tmp2; \ + a3.real = a1.real - tmp3; \ + a3.imag = a1.imag - tmp4; \ + a0.real += tmp1; \ + a0.imag += tmp2; \ + a1.real += tmp3; \ + a1.imag += tmp4; \ +} while (0) + +/* split-radix ifft butterfly, specialized for wr=wi */ + +#define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ + tmp5 = MUL (a2.real + a2.imag, w); \ + tmp6 = MUL (a2.imag - a2.real, w); \ + tmp7 = MUL (a3.real - a3.imag, w); \ + tmp8 = MUL (a3.imag + a3.real, w); \ + tmp1 = tmp5 + tmp7; \ + tmp2 = tmp6 + tmp8; \ + tmp3 = tmp6 - tmp8; \ + tmp4 = tmp7 - tmp5; \ + a2.real = a0.real - tmp1; \ + a2.imag = a0.imag - tmp2; \ + a3.real = a1.real - tmp3; \ + a3.imag = a1.imag - tmp4; \ + a0.real += tmp1; \ + a0.imag += tmp2; \ + a1.real += tmp3; \ + a1.imag += tmp4; \ +} while (0) + +static inline void ifft8 (complex_t * buf) +{ + sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; + + ifft4 (buf); + ifft2 (buf + 4); + ifft2 (buf + 6); + BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); + BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); +} + +static void ifft_pass (complex_t * buf, sample_t * weight, int n) +{ + complex_t * buf1; + complex_t * buf2; + complex_t * buf3; + sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; + int i; + + buf++; + buf1 = buf + n; + buf2 = buf + 2 * n; + buf3 = buf + 3 * n; + + BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); + + i = n - 1; + + do { + BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], + weight[0], weight[2*i-n]); + buf++; + buf1++; + buf2++; + buf3++; + weight++; + } while (--i); +} + +static void ifft16 (complex_t * buf) +{ + ifft8 (buf); + ifft4 (buf + 8); + ifft4 (buf + 12); + ifft_pass (buf, roots16, 4); +} + +static void ifft32 (complex_t * buf) +{ + ifft16 (buf); + ifft8 (buf + 16); + ifft8 (buf + 24); + ifft_pass (buf, roots32, 8); +} + +static void ifft64_c (complex_t * buf) +{ + ifft32 (buf); + ifft16 (buf + 32); + ifft16 (buf + 48); + ifft_pass (buf, roots64, 16); +} + +static void ifft128_c (complex_t * buf) +{ + ifft32 (buf); + ifft16 (buf + 32); + ifft16 (buf + 48); + ifft_pass (buf, roots64, 16); + + ifft32 (buf + 64); + ifft32 (buf + 96); + ifft_pass (buf, roots128, 32); +} + +void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) +{ + int i, k; + sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; + const sample_t * window = a52_imdct_window; + complex_t buf[128]; + + for (i = 0; i < 128; i++) { + k = fftorder[i]; + t_r = pre1[i].real; + t_i = pre1[i].imag; + BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]); + } + + ifft128 (buf); + + /* Post IFFT complex multiply plus IFFT complex conjugate*/ + /* Window and convert to real valued signal */ + for (i = 0; i < 64; i++) { + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ + t_r = post1[i].real; + t_i = post1[i].imag; + BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real); + BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real); + + w_1 = window[2*i]; + w_2 = window[255-2*i]; + BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); + delay[2*i] = a_i; + + w_1 = window[2*i+1]; + w_2 = window[254-2*i]; + BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]); + delay[2*i+1] = b_i; + } +} + +void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias) +{ + int i, k; + sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; + const sample_t * window = a52_imdct_window; + complex_t buf1[64], buf2[64]; + + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + for (i = 0; i < 64; i++) { + k = fftorder[i]; + t_r = pre2[i].real; + t_i = pre2[i].imag; + BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]); + BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]); + } + + ifft64 (buf1); + ifft64 (buf2); + + /* Post IFFT complex multiply */ + /* Window and convert to real valued signal */ + for (i = 0; i < 32; i++) { + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ + t_r = post2[i].real; + t_i = post2[i].imag; + BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real); + BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real); + BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real); + BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real); + + w_1 = window[2*i]; + w_2 = window[255-2*i]; + BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); + delay[2*i] = c_i; + + w_1 = window[128+2*i]; + w_2 = window[127-2*i]; + BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]); + delay[127-2*i] = c_r; + + w_1 = window[2*i+1]; + w_2 = window[254-2*i]; + BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]); + delay[2*i+1] = d_r; + + w_1 = window[129+2*i]; + w_2 = window[126-2*i]; + BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]); + delay[126-2*i] = d_i; + } +} + +static double besselI0 (double x) +{ + double bessel = 1; + int i = 100; + + do + bessel = bessel * x / (i * i) + 1; + while (--i); + return bessel; +} + +void a52_imdct_init (uint32_t mm_accel) +{ + int i, k; + double sum; + double local_imdct_window[256]; + + /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ + sum = 0; + for (i = 0; i < 256; i++) { + sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); + local_imdct_window[i] = sum; + } + sum++; + for (i = 0; i < 256; i++) + a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum)); + + for (i = 0; i < 3; i++) + roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1))); + + for (i = 0; i < 7; i++) + roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1))); + + for (i = 0; i < 15; i++) + roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1))); + + for (i = 0; i < 31; i++) + roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1))); + + for (i = 0; i < 64; i++) { + k = fftorder[i] / 2 + 64; + pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25))); + pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25))); + } + + for (i = 64; i < 128; i++) { + k = fftorder[i] / 2 + 64; + pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25))); + pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25))); + } + + for (i = 0; i < 64; i++) { + post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5))); + post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5))); + } + + for (i = 0; i < 64; i++) { + k = fftorder[i] / 4; + pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25))); + pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25))); + } + + for (i = 0; i < 32; i++) { + post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5))); + post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5))); + } + +#ifdef LIBA52_DJBFFT + if (mm_accel & MM_ACCEL_DJBFFT) { + ifft128 = (void (*) (complex_t *)) fftc4_un128; + ifft64 = (void (*) (complex_t *)) fftc4_un64; + } else +#endif + { + ifft128 = ifft128_c; + ifft64 = ifft64_c; + } +} diff --git a/mpeg4/src/libavcodec/liba52/mm_accel.h b/mpeg4/src/libavcodec/liba52/mm_accel.h new file mode 100644 index 0000000000000000000000000000000000000000..9a475f5a2a69986b22651eb9001ca79deada8a91 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/mm_accel.h @@ -0,0 +1,42 @@ +/* + * mm_accel.h + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef MM_ACCEL_H +#define MM_ACCEL_H + +/* generic accelerations */ +#define MM_ACCEL_DJBFFT 0x00000001 + +/* x86 accelerations */ +#define MM_ACCEL_X86_MMX 0x80000000 +#define MM_ACCEL_X86_3DNOW 0x40000000 +#define MM_ACCEL_X86_MMXEXT 0x20000000 +#define MM_ACCEL_X86_SSE 0x10000000 +#define MM_ACCEL_X86_3DNOWEXT 0x08000000 + +/* PPC accelerations */ +#define MM_ACCEL_PPC_ALTIVEC 0x00010000 + +uint32_t mm_accel (void); + +#endif /* MM_ACCEL_H */ diff --git a/mpeg4/src/libavcodec/liba52/parse.c b/mpeg4/src/libavcodec/liba52/parse.c new file mode 100644 index 0000000000000000000000000000000000000000..5a0701564170a9d767afcd37014c0a92edf1b530 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/parse.c @@ -0,0 +1,939 @@ +/* + * parse.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "a52.h" +#include "a52_internal.h" +#include "bitstream.h" +#include "tables.h" + +#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) +/* some systems have memalign() but no declaration for it */ +void * memalign (size_t align, size_t size); +#else +/* assume malloc alignment is sufficient */ +#define memalign(align,size) malloc (size) +#endif + +typedef struct { + quantizer_t q1[2]; + quantizer_t q2[2]; + quantizer_t q4; + int q1_ptr; + int q2_ptr; + int q4_ptr; +} quantizer_set_t; + +static uint8_t halfrate[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3}; + +a52_state_t * a52_init (uint32_t mm_accel) +{ + a52_state_t * state; + int i; + + state = (a52_state_t *) malloc (sizeof (a52_state_t)); + if (state == NULL) + return NULL; + + state->samples = (sample_t *) memalign (16, 256 * 12 * sizeof (sample_t)); + if (state->samples == NULL) { + free (state); + return NULL; + } + + for (i = 0; i < 256 * 12; i++) + state->samples[i] = 0; + + state->downmixed = 1; + + state->lfsr_state = 1; + + a52_imdct_init (mm_accel); + + return state; +} + +sample_t * a52_samples (a52_state_t * state) +{ + return state->samples; +} + +int a52_syncinfo (uint8_t * buf, int * flags, + int * sample_rate, int * bit_rate) +{ + static int rate[] = { 32, 40, 48, 56, 64, 80, 96, 112, + 128, 160, 192, 224, 256, 320, 384, 448, + 512, 576, 640}; + static uint8_t lfeon[8] = {0x10, 0x10, 0x04, 0x04, 0x04, 0x01, 0x04, 0x01}; + int frmsizecod; + int bitrate; + int half; + int acmod; + + if ((buf[0] != 0x0b) || (buf[1] != 0x77)) /* syncword */ + return 0; + + if (buf[5] >= 0x60) /* bsid >= 12 */ + return 0; + half = halfrate[buf[5] >> 3]; + + /* acmod, dsurmod and lfeon */ + acmod = buf[6] >> 5; + *flags = ((((buf[6] & 0xf8) == 0x50) ? A52_DOLBY : acmod) | + ((buf[6] & lfeon[acmod]) ? A52_LFE : 0)); + + frmsizecod = buf[4] & 63; + if (frmsizecod >= 38) + return 0; + bitrate = rate [frmsizecod >> 1]; + *bit_rate = (bitrate * 1000) >> half; + + switch (buf[4] & 0xc0) { + case 0: + *sample_rate = 48000 >> half; + return 4 * bitrate; + case 0x40: + *sample_rate = 44100 >> half; + return 2 * (320 * bitrate / 147 + (frmsizecod & 1)); + case 0x80: + *sample_rate = 32000 >> half; + return 6 * bitrate; + default: + return 0; + } +} + +int a52_frame (a52_state_t * state, uint8_t * buf, int * flags, + level_t * level, sample_t bias) +{ + static level_t clev[4] = { LEVEL (LEVEL_3DB), LEVEL (LEVEL_45DB), + LEVEL (LEVEL_6DB), LEVEL (LEVEL_45DB) }; + static level_t slev[4] = { LEVEL (LEVEL_3DB), LEVEL (LEVEL_6DB), + 0, LEVEL (LEVEL_6DB) }; + int chaninfo; + int acmod; + + state->fscod = buf[4] >> 6; + state->halfrate = halfrate[buf[5] >> 3]; + state->acmod = acmod = buf[6] >> 5; + + a52_bitstream_set_ptr (state, buf + 6); + bitstream_get (state, 3); /* skip acmod we already parsed */ + + if ((acmod == 2) && (bitstream_get (state, 2) == 2)) /* dsurmod */ + acmod = A52_DOLBY; + + state->clev = state->slev = 0; + + if ((acmod & 1) && (acmod != 1)) + state->clev = clev[bitstream_get (state, 2)]; /* cmixlev */ + + if (acmod & 4) + state->slev = slev[bitstream_get (state, 2)]; /* surmixlev */ + + state->lfeon = bitstream_get (state, 1); + + state->output = a52_downmix_init (acmod, *flags, level, + state->clev, state->slev); + if (state->output < 0) + return 1; + if (state->lfeon && (*flags & A52_LFE)) + state->output |= A52_LFE; + *flags = state->output; + /* the 2* compensates for differences in imdct */ + state->dynrng = state->level = MUL_C (*level, 2); + state->bias = bias; + state->dynrnge = 1; + state->dynrngcall = NULL; + state->cplba.deltbae = DELTA_BIT_NONE; + state->ba[0].deltbae = state->ba[1].deltbae = state->ba[2].deltbae = + state->ba[3].deltbae = state->ba[4].deltbae = DELTA_BIT_NONE; + + chaninfo = !acmod; + do { + bitstream_get (state, 5); /* dialnorm */ + if (bitstream_get (state, 1)) /* compre */ + bitstream_get (state, 8); /* compr */ + if (bitstream_get (state, 1)) /* langcode */ + bitstream_get (state, 8); /* langcod */ + if (bitstream_get (state, 1)) /* audprodie */ + bitstream_get (state, 7); /* mixlevel + roomtyp */ + } while (chaninfo--); + + bitstream_get (state, 2); /* copyrightb + origbs */ + + if (bitstream_get (state, 1)) /* timecod1e */ + bitstream_get (state, 14); /* timecod1 */ + if (bitstream_get (state, 1)) /* timecod2e */ + bitstream_get (state, 14); /* timecod2 */ + + if (bitstream_get (state, 1)) { /* addbsie */ + int addbsil; + + addbsil = bitstream_get (state, 6); + do { + bitstream_get (state, 8); /* addbsi */ + } while (addbsil--); + } + + return 0; +} + +void a52_dynrng (a52_state_t * state, + level_t (* call) (level_t, void *), void * data) +{ + state->dynrnge = 0; + if (call) { + state->dynrnge = 1; + state->dynrngcall = call; + state->dynrngdata = data; + } +} + +static int parse_exponents (a52_state_t * state, int expstr, int ngrps, + uint8_t exponent, uint8_t * dest) +{ + int exps; + + while (ngrps--) { + exps = bitstream_get (state, 7); + + exponent += exp_1[exps]; + if (exponent > 24) + return 1; + + switch (expstr) { + case EXP_D45: + *(dest++) = exponent; + *(dest++) = exponent; + case EXP_D25: + *(dest++) = exponent; + case EXP_D15: + *(dest++) = exponent; + } + + exponent += exp_2[exps]; + if (exponent > 24) + return 1; + + switch (expstr) { + case EXP_D45: + *(dest++) = exponent; + *(dest++) = exponent; + case EXP_D25: + *(dest++) = exponent; + case EXP_D15: + *(dest++) = exponent; + } + + exponent += exp_3[exps]; + if (exponent > 24) + return 1; + + switch (expstr) { + case EXP_D45: + *(dest++) = exponent; + *(dest++) = exponent; + case EXP_D25: + *(dest++) = exponent; + case EXP_D15: + *(dest++) = exponent; + } + } + + return 0; +} + +static int parse_deltba (a52_state_t * state, int8_t * deltba) +{ + int deltnseg, deltlen, delta, j; + + memset (deltba, 0, 50); + + deltnseg = bitstream_get (state, 3); + j = 0; + do { + j += bitstream_get (state, 5); + deltlen = bitstream_get (state, 4); + delta = bitstream_get (state, 3); + delta -= (delta >= 4) ? 3 : 4; + if (!deltlen) + continue; + if (j + deltlen >= 50) + return 1; + while (deltlen--) + deltba[j++] = delta; + } while (deltnseg--); + + return 0; +} + +static inline int zero_snr_offsets (int nfchans, a52_state_t * state) +{ + int i; + + if ((state->csnroffst) || + (state->chincpl && state->cplba.bai >> 3) || /* cplinu, fsnroffst */ + (state->lfeon && state->lfeba.bai >> 3)) /* fsnroffst */ + return 0; + for (i = 0; i < nfchans; i++) + if (state->ba[i].bai >> 3) /* fsnroffst */ + return 0; + return 1; +} + +static inline int16_t dither_gen (a52_state_t * state) +{ + int16_t nstate; + + nstate = dither_lut[state->lfsr_state >> 8] ^ (state->lfsr_state << 8); + + state->lfsr_state = (uint16_t) nstate; + + return (3 * nstate) >> 2; +} + +#ifndef LIBA52_FIXED +#define COEFF(c,t,l,s,e) (c) = (t) * (s)[e] +#else +#define COEFF(c,_t,_l,s,e) do { \ + quantizer_t t = (_t); \ + level_t l = (_l); \ + int shift = e - 5; \ + sample_t tmp = t * (l >> 16) + ((t * (l & 0xffff)) >> 16); \ + if (shift >= 0) \ + (c) = tmp >> shift; \ + else \ + (c) = tmp << -shift; \ +} while (0) +#endif + +static void coeff_get (a52_state_t * state, sample_t * coeff, + expbap_t * expbap, quantizer_set_t * quant, + level_t level, int dither, int end) +{ + int i; + uint8_t * exp; + int8_t * bap; + +#ifndef LIBA52_FIXED + sample_t factor[25]; + + for (i = 0; i <= 24; i++) + factor[i] = scale_factor[i] * level; +#endif + + exp = expbap->exp; + bap = expbap->bap; + + for (i = 0; i < end; i++) { + int bapi; + + bapi = bap[i]; + switch (bapi) { + case 0: + if (dither) { + COEFF (coeff[i], dither_gen (state), level, factor, exp[i]); + continue; + } else { + coeff[i] = 0; + continue; + } + + case -1: + if (quant->q1_ptr >= 0) { + COEFF (coeff[i], quant->q1[quant->q1_ptr--], level, + factor, exp[i]); + continue; + } else { + int code; + + code = bitstream_get (state, 5); + + quant->q1_ptr = 1; + quant->q1[0] = q_1_2[code]; + quant->q1[1] = q_1_1[code]; + COEFF (coeff[i], q_1_0[code], level, factor, exp[i]); + continue; + } + + case -2: + if (quant->q2_ptr >= 0) { + COEFF (coeff[i], quant->q2[quant->q2_ptr--], level, + factor, exp[i]); + continue; + } else { + int code; + + code = bitstream_get (state, 7); + + quant->q2_ptr = 1; + quant->q2[0] = q_2_2[code]; + quant->q2[1] = q_2_1[code]; + COEFF (coeff[i], q_2_0[code], level, factor, exp[i]); + continue; + } + + case 3: + COEFF (coeff[i], q_3[bitstream_get (state, 3)], level, + factor, exp[i]); + continue; + + case -3: + if (quant->q4_ptr == 0) { + quant->q4_ptr = -1; + COEFF (coeff[i], quant->q4, level, factor, exp[i]); + continue; + } else { + int code; + + code = bitstream_get (state, 7); + + quant->q4_ptr = 0; + quant->q4 = q_4_1[code]; + COEFF (coeff[i], q_4_0[code], level, factor, exp[i]); + continue; + } + + case 4: + COEFF (coeff[i], q_5[bitstream_get (state, 4)], level, + factor, exp[i]); + continue; + + default: + COEFF (coeff[i], bitstream_get_2 (state, bapi) << (16 - bapi), + level, factor, exp[i]); + } + } +} + +static void coeff_get_coupling (a52_state_t * state, int nfchans, + level_t * coeff, sample_t (* samples)[256], + quantizer_set_t * quant, uint8_t dithflag[5]) +{ + int cplbndstrc, bnd, i, i_end, ch; + uint8_t * exp; + int8_t * bap; + level_t cplco[5]; + + exp = state->cpl_expbap.exp; + bap = state->cpl_expbap.bap; + bnd = 0; + cplbndstrc = state->cplbndstrc; + i = state->cplstrtmant; + while (i < state->cplendmant) { + i_end = i + 12; + while (cplbndstrc & 1) { + cplbndstrc >>= 1; + i_end += 12; + } + cplbndstrc >>= 1; + for (ch = 0; ch < nfchans; ch++) + cplco[ch] = MUL_L (state->cplco[ch][bnd], coeff[ch]); + bnd++; + + while (i < i_end) { + quantizer_t cplcoeff; + int bapi; + + bapi = bap[i]; + switch (bapi) { + case 0: + for (ch = 0; ch < nfchans; ch++) + if ((state->chincpl >> ch) & 1) { + if (dithflag[ch]) +#ifndef LIBA52_FIXED + samples[ch][i] = (scale_factor[exp[i]] * + cplco[ch] * dither_gen (state)); +#else + COEFF (samples[ch][i], dither_gen (state), + cplco[ch], scale_factor, exp[i]); +#endif + else + samples[ch][i] = 0; + } + i++; + continue; + + case -1: + if (quant->q1_ptr >= 0) { + cplcoeff = quant->q1[quant->q1_ptr--]; + break; + } else { + int code; + + code = bitstream_get (state, 5); + + quant->q1_ptr = 1; + quant->q1[0] = q_1_2[code]; + quant->q1[1] = q_1_1[code]; + cplcoeff = q_1_0[code]; + break; + } + + case -2: + if (quant->q2_ptr >= 0) { + cplcoeff = quant->q2[quant->q2_ptr--]; + break; + } else { + int code; + + code = bitstream_get (state, 7); + + quant->q2_ptr = 1; + quant->q2[0] = q_2_2[code]; + quant->q2[1] = q_2_1[code]; + cplcoeff = q_2_0[code]; + break; + } + + case 3: + cplcoeff = q_3[bitstream_get (state, 3)]; + break; + + case -3: + if (quant->q4_ptr == 0) { + quant->q4_ptr = -1; + cplcoeff = quant->q4; + break; + } else { + int code; + + code = bitstream_get (state, 7); + + quant->q4_ptr = 0; + quant->q4 = q_4_1[code]; + cplcoeff = q_4_0[code]; + break; + } + + case 4: + cplcoeff = q_5[bitstream_get (state, 4)]; + break; + + default: + cplcoeff = bitstream_get_2 (state, bapi) << (16 - bapi); + } +#ifndef LIBA52_FIXED + cplcoeff *= scale_factor[exp[i]]; +#endif + for (ch = 0; ch < nfchans; ch++) + if ((state->chincpl >> ch) & 1) +#ifndef LIBA52_FIXED + samples[ch][i] = cplcoeff * cplco[ch]; +#else + COEFF (samples[ch][i], cplcoeff, cplco[ch], + scale_factor, exp[i]); +#endif + i++; + } + } +} + +int a52_block (a52_state_t * state) +{ + static const uint8_t nfchans_tbl[] = {2, 1, 2, 3, 3, 4, 4, 5, 1, 1, 2}; + static int rematrix_band[4] = {25, 37, 61, 253}; + int i, nfchans, chaninfo; + uint8_t cplexpstr, chexpstr[5], lfeexpstr, do_bit_alloc, done_cpl; + uint8_t blksw[5], dithflag[5]; + level_t coeff[5]; + int chanbias; + quantizer_set_t quant; + sample_t * samples; + + nfchans = nfchans_tbl[state->acmod]; + + for (i = 0; i < nfchans; i++) + blksw[i] = bitstream_get (state, 1); + + for (i = 0; i < nfchans; i++) + dithflag[i] = bitstream_get (state, 1); + + chaninfo = !state->acmod; + do { + if (bitstream_get (state, 1)) { /* dynrnge */ + int dynrng; + + dynrng = bitstream_get_2 (state, 8); + if (state->dynrnge) { + level_t range; + +#if !defined(LIBA52_FIXED) + range = ((((dynrng & 0x1f) | 0x20) << 13) * + scale_factor[3 - (dynrng >> 5)]); +#else + range = ((dynrng & 0x1f) | 0x20) << (21 + (dynrng >> 5)); +#endif + if (state->dynrngcall) + range = state->dynrngcall (range, state->dynrngdata); + state->dynrng = MUL_L (state->level, range); + } + } + } while (chaninfo--); + + if (bitstream_get (state, 1)) { /* cplstre */ + state->chincpl = 0; + if (bitstream_get (state, 1)) { /* cplinu */ + static uint8_t bndtab[16] = {31, 35, 37, 39, 41, 42, 43, 44, + 45, 45, 46, 46, 47, 47, 48, 48}; + int cplbegf; + int cplendf; + int ncplsubnd; + + for (i = 0; i < nfchans; i++) + state->chincpl |= bitstream_get (state, 1) << i; + switch (state->acmod) { + case 0: case 1: + return 1; + case 2: + state->phsflginu = bitstream_get (state, 1); + } + cplbegf = bitstream_get (state, 4); + cplendf = bitstream_get (state, 4); + + if (cplendf + 3 - cplbegf < 0) + return 1; + state->ncplbnd = ncplsubnd = cplendf + 3 - cplbegf; + state->cplstrtbnd = bndtab[cplbegf]; + state->cplstrtmant = cplbegf * 12 + 37; + state->cplendmant = cplendf * 12 + 73; + + state->cplbndstrc = 0; + for (i = 0; i < ncplsubnd - 1; i++) + if (bitstream_get (state, 1)) { + state->cplbndstrc |= 1 << i; + state->ncplbnd--; + } + } + } + + if (state->chincpl) { /* cplinu */ + int j, cplcoe; + + cplcoe = 0; + for (i = 0; i < nfchans; i++) + if ((state->chincpl) >> i & 1) + if (bitstream_get (state, 1)) { /* cplcoe */ + int mstrcplco, cplcoexp, cplcomant; + + cplcoe = 1; + mstrcplco = 3 * bitstream_get (state, 2); + for (j = 0; j < state->ncplbnd; j++) { + cplcoexp = bitstream_get (state, 4); + cplcomant = bitstream_get (state, 4); + if (cplcoexp == 15) + cplcomant <<= 14; + else + cplcomant = (cplcomant | 0x10) << 13; +#ifndef LIBA52_FIXED + state->cplco[i][j] = + cplcomant * scale_factor[cplcoexp + mstrcplco]; +#else + state->cplco[i][j] = (cplcomant << 11) >> (cplcoexp + mstrcplco); +#endif + + } + } + if ((state->acmod == 2) && state->phsflginu && cplcoe) + for (j = 0; j < state->ncplbnd; j++) + if (bitstream_get (state, 1)) /* phsflg */ + state->cplco[1][j] = -state->cplco[1][j]; + } + + if ((state->acmod == 2) && (bitstream_get (state, 1))) { /* rematstr */ + int end; + + state->rematflg = 0; + end = (state->chincpl) ? state->cplstrtmant : 253; /* cplinu */ + i = 0; + do + state->rematflg |= bitstream_get (state, 1) << i; + while (rematrix_band[i++] < end); + } + + cplexpstr = EXP_REUSE; + lfeexpstr = EXP_REUSE; + if (state->chincpl) /* cplinu */ + cplexpstr = bitstream_get (state, 2); + for (i = 0; i < nfchans; i++) + chexpstr[i] = bitstream_get (state, 2); + if (state->lfeon) + lfeexpstr = bitstream_get (state, 1); + + for (i = 0; i < nfchans; i++) + if (chexpstr[i] != EXP_REUSE) { + if ((state->chincpl >> i) & 1) + state->endmant[i] = state->cplstrtmant; + else { + int chbwcod; + + chbwcod = bitstream_get (state, 6); + if (chbwcod > 60) + return 1; + state->endmant[i] = chbwcod * 3 + 73; + } + } + + do_bit_alloc = 0; + + if (cplexpstr != EXP_REUSE) { + int cplabsexp, ncplgrps; + + do_bit_alloc = 64; + ncplgrps = ((state->cplendmant - state->cplstrtmant) / + (3 << (cplexpstr - 1))); + cplabsexp = bitstream_get (state, 4) << 1; + if (parse_exponents (state, cplexpstr, ncplgrps, cplabsexp, + state->cpl_expbap.exp + state->cplstrtmant)) + return 1; + } + for (i = 0; i < nfchans; i++) + if (chexpstr[i] != EXP_REUSE) { + int grp_size, nchgrps; + + do_bit_alloc |= 1 << i; + grp_size = 3 << (chexpstr[i] - 1); + nchgrps = (state->endmant[i] + grp_size - 4) / grp_size; + state->fbw_expbap[i].exp[0] = bitstream_get (state, 4); + if (parse_exponents (state, chexpstr[i], nchgrps, + state->fbw_expbap[i].exp[0], + state->fbw_expbap[i].exp + 1)) + return 1; + bitstream_get (state, 2); /* gainrng */ + } + if (lfeexpstr != EXP_REUSE) { + do_bit_alloc |= 32; + state->lfe_expbap.exp[0] = bitstream_get (state, 4); + if (parse_exponents (state, lfeexpstr, 2, state->lfe_expbap.exp[0], + state->lfe_expbap.exp + 1)) + return 1; + } + + if (bitstream_get (state, 1)) { /* baie */ + do_bit_alloc = 127; + state->bai = bitstream_get (state, 11); + } + if (bitstream_get (state, 1)) { /* snroffste */ + do_bit_alloc = 127; + state->csnroffst = bitstream_get (state, 6); + if (state->chincpl) /* cplinu */ + state->cplba.bai = bitstream_get (state, 7); + for (i = 0; i < nfchans; i++) + state->ba[i].bai = bitstream_get (state, 7); + if (state->lfeon) + state->lfeba.bai = bitstream_get (state, 7); + } + if ((state->chincpl) && (bitstream_get (state, 1))) { /* cplleake */ + do_bit_alloc |= 64; + state->cplfleak = 9 - bitstream_get (state, 3); + state->cplsleak = 9 - bitstream_get (state, 3); + } + + if (bitstream_get (state, 1)) { /* deltbaie */ + do_bit_alloc = 127; + if (state->chincpl) /* cplinu */ + state->cplba.deltbae = bitstream_get (state, 2); + for (i = 0; i < nfchans; i++) + state->ba[i].deltbae = bitstream_get (state, 2); + if (state->chincpl && /* cplinu */ + (state->cplba.deltbae == DELTA_BIT_NEW) && + parse_deltba (state, state->cplba.deltba)) + return 1; + for (i = 0; i < nfchans; i++) + if ((state->ba[i].deltbae == DELTA_BIT_NEW) && + parse_deltba (state, state->ba[i].deltba)) + return 1; + } + + if (do_bit_alloc) { + if (zero_snr_offsets (nfchans, state)) { + memset (state->cpl_expbap.bap, 0, sizeof (state->cpl_expbap.bap)); + for (i = 0; i < nfchans; i++) + memset (state->fbw_expbap[i].bap, 0, + sizeof (state->fbw_expbap[i].bap)); + memset (state->lfe_expbap.bap, 0, sizeof (state->lfe_expbap.bap)); + } else { + if (state->chincpl && (do_bit_alloc & 64)) /* cplinu */ + a52_bit_allocate (state, &state->cplba, state->cplstrtbnd, + state->cplstrtmant, state->cplendmant, + state->cplfleak << 8, state->cplsleak << 8, + &state->cpl_expbap); + for (i = 0; i < nfchans; i++) + if (do_bit_alloc & (1 << i)) + a52_bit_allocate (state, state->ba + i, 0, 0, + state->endmant[i], 0, 0, + state->fbw_expbap +i); + if (state->lfeon && (do_bit_alloc & 32)) { + state->lfeba.deltbae = DELTA_BIT_NONE; + a52_bit_allocate (state, &state->lfeba, 0, 0, 7, 0, 0, + &state->lfe_expbap); + } + } + } + + if (bitstream_get (state, 1)) { /* skiple */ + i = bitstream_get (state, 9); /* skipl */ + while (i--) + bitstream_get (state, 8); + } + + samples = state->samples; + if (state->output & A52_LFE) + samples += 256; /* shift for LFE channel */ + + chanbias = a52_downmix_coeff (coeff, state->acmod, state->output, + state->dynrng, state->clev, state->slev); + + quant.q1_ptr = quant.q2_ptr = quant.q4_ptr = -1; + done_cpl = 0; + + for (i = 0; i < nfchans; i++) { + int j; + + coeff_get (state, samples + 256 * i, state->fbw_expbap +i, &quant, + coeff[i], dithflag[i], state->endmant[i]); + + if ((state->chincpl >> i) & 1) { + if (!done_cpl) { + done_cpl = 1; + coeff_get_coupling (state, nfchans, coeff, + (sample_t (*)[256])samples, &quant, + dithflag); + } + j = state->cplendmant; + } else + j = state->endmant[i]; + do + (samples + 256 * i)[j] = 0; + while (++j < 256); + } + + if (state->acmod == 2) { + int j, end, band, rematflg; + + end = ((state->endmant[0] < state->endmant[1]) ? + state->endmant[0] : state->endmant[1]); + + i = 0; + j = 13; + rematflg = state->rematflg; + do { + if (! (rematflg & 1)) { + rematflg >>= 1; + j = rematrix_band[i++]; + continue; + } + rematflg >>= 1; + band = rematrix_band[i++]; + if (band > end) + band = end; + do { + sample_t tmp0, tmp1; + + tmp0 = samples[j]; + tmp1 = (samples+256)[j]; + samples[j] = tmp0 + tmp1; + (samples+256)[j] = tmp0 - tmp1; + } while (++j < band); + } while (j < end); + } + + if (state->lfeon) { + if (state->output & A52_LFE) { + coeff_get (state, samples - 256, &state->lfe_expbap, &quant, + state->dynrng, 0, 7); + for (i = 7; i < 256; i++) + (samples-256)[i] = 0; + a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias); + } else { + /* just skip the LFE coefficients */ + coeff_get (state, samples + 1280, &state->lfe_expbap, &quant, + 0, 0, 7); + } + } + + i = 0; + if (nfchans_tbl[state->output & A52_CHANNEL_MASK] < nfchans) + for (i = 1; i < nfchans; i++) + if (blksw[i] != blksw[0]) + break; + + if (i < nfchans) { + if (state->downmixed) { + state->downmixed = 0; + a52_upmix (samples + 1536, state->acmod, state->output); + } + + for (i = 0; i < nfchans; i++) { + sample_t bias; + + bias = 0; + if (!(chanbias & (1 << i))) + bias = state->bias; + + if (coeff[i]) { + if (blksw[i]) + a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, + bias); + else + a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, + bias); + } else { + int j; + + for (j = 0; j < 256; j++) + (samples + 256 * i)[j] = bias; + } + } + + a52_downmix (samples, state->acmod, state->output, state->bias, + state->clev, state->slev); + } else { + nfchans = nfchans_tbl[state->output & A52_CHANNEL_MASK]; + + a52_downmix (samples, state->acmod, state->output, 0, + state->clev, state->slev); + + if (!state->downmixed) { + state->downmixed = 1; + a52_downmix (samples + 1536, state->acmod, state->output, 0, + state->clev, state->slev); + } + + if (blksw[0]) + for (i = 0; i < nfchans; i++) + a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i, + state->bias); + else + for (i = 0; i < nfchans; i++) + a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i, + state->bias); + } + + return 0; +} + +void a52_free (a52_state_t * state) +{ + free (state->samples); + free (state); +} diff --git a/mpeg4/src/libavcodec/liba52/resample.c b/mpeg4/src/libavcodec/liba52/resample.c new file mode 100644 index 0000000000000000000000000000000000000000..faa6877fdd2995a02e18996a53b7633775bfcdf6 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/resample.c @@ -0,0 +1,43 @@ + +// a52_resample_init should find the requested converter (from type flags -> +// given number of channels) and set up some function pointers... + +// a52_resample() should do the conversion. + +#include "a52.h" +#include "mm_accel.h" +#include "config.h" +#include "../libpostproc/mangle.h" + +int (* a52_resample) (float * _f, int16_t * s16)=NULL; + +#include "resample_c.c" + +#ifdef ARCH_X86 +#include "resample_mmx.c" +#endif + +void* a52_resample_init(uint32_t mm_accel,int flags,int chans){ +void* tmp; + +#ifdef ARCH_X86 + if(mm_accel&MM_ACCEL_X86_MMX){ + tmp=a52_resample_MMX(flags,chans); + if(tmp){ + if(a52_resample==NULL) av_log(NULL, AV_LOG_INFO, "Using MMX optimized resampler\n"); + a52_resample=tmp; + return tmp; + } + } +#endif + + tmp=a52_resample_C(flags,chans); + if(tmp){ + if(a52_resample==NULL) av_log(NULL, AV_LOG_INFO, "No accelerated resampler found\n"); + a52_resample=tmp; + return tmp; + } + + av_log(NULL, AV_LOG_ERROR, "Unimplemented resampler for mode 0x%X -> %d channels conversion - Contact MPlayer developers!\n", flags, chans); + return NULL; +} diff --git a/mpeg4/src/libavcodec/liba52/resample_c.c b/mpeg4/src/libavcodec/liba52/resample_c.c new file mode 100644 index 0000000000000000000000000000000000000000..a618ec6e9ecdd8c091655d823a998c57b396cce3 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/resample_c.c @@ -0,0 +1,183 @@ +// this code is based on a52dec/libao/audio_out_oss.c + +static inline int16_t convert (int32_t i) +{ + if (i > 0x43c07fff) + return 32767; + else if (i < 0x43bf8000) + return -32768; + else + return i - 0x43c00000; +} + +static int a52_resample_MONO_to_5_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert (f[i]); + } + return 5*256; +} + +static int a52_resample_MONO_to_1_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[i] = convert (f[i]); + } + return 1*256; +} + +static int a52_resample_STEREO_to_2_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[2*i] = convert (f[i]); + s16[2*i+1] = convert (f[i+256]); + } + return 2*256; +} + +static int a52_resample_3F_to_5_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[5*i] = convert (f[i]); + s16[5*i+1] = convert (f[i+512]); + s16[5*i+2] = s16[5*i+3] = 0; + s16[5*i+4] = convert (f[i+256]); + } + return 5*256; +} + +static int a52_resample_2F_2R_to_4_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[4*i] = convert (f[i]); + s16[4*i+1] = convert (f[i+256]); + s16[4*i+2] = convert (f[i+512]); + s16[4*i+3] = convert (f[i+768]); + } + return 4*256; +} + +static int a52_resample_3F_2R_to_5_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[5*i] = convert (f[i]); + s16[5*i+1] = convert (f[i+512]); + s16[5*i+2] = convert (f[i+768]); + s16[5*i+3] = convert (f[i+1024]); + s16[5*i+4] = convert (f[i+256]); + } + return 5*256; +} + +static int a52_resample_MONO_LFE_to_6_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert (f[i+256]); + s16[6*i+5] = convert (f[i]); + } + return 6*256; +} + +static int a52_resample_STEREO_LFE_to_6_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+512]); + s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; + s16[6*i+5] = convert (f[i]); + } + return 6*256; +} + +static int a52_resample_3F_LFE_to_6_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+768]); + s16[6*i+2] = s16[6*i+3] = 0; + s16[6*i+4] = convert (f[i+512]); + s16[6*i+5] = convert (f[i]); + } + return 6*256; +} + +static int a52_resample_2F_2R_LFE_to_6_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+512]); + s16[6*i+2] = convert (f[i+768]); + s16[6*i+3] = convert (f[i+1024]); + s16[6*i+4] = 0; + s16[6*i+5] = convert (f[i]); + } + return 6*256; +} + +static int a52_resample_3F_2R_LFE_to_6_C(float * _f, int16_t * s16){ + int i; + int32_t * f = (int32_t *) _f; + for (i = 0; i < 256; i++) { + s16[6*i] = convert (f[i+256]); + s16[6*i+1] = convert (f[i+768]); + s16[6*i+2] = convert (f[i+1024]); + s16[6*i+3] = convert (f[i+1280]); + s16[6*i+4] = convert (f[i+512]); + s16[6*i+5] = convert (f[i]); + } + return 6*256; +} + + +static void* a52_resample_C(int flags, int ch){ + switch (flags) { + case A52_MONO: + if(ch==5) return a52_resample_MONO_to_5_C; + if(ch==1) return a52_resample_MONO_to_1_C; + break; + case A52_CHANNEL: + case A52_STEREO: + case A52_DOLBY: + if(ch==2) return a52_resample_STEREO_to_2_C; + break; + case A52_3F: + if(ch==5) return a52_resample_3F_to_5_C; + break; + case A52_2F2R: + if(ch==4) return a52_resample_2F_2R_to_4_C; + break; + case A52_3F2R: + if(ch==5) return a52_resample_3F_2R_to_5_C; + break; + case A52_MONO | A52_LFE: + if(ch==6) return a52_resample_MONO_LFE_to_6_C; + break; + case A52_CHANNEL | A52_LFE: + case A52_STEREO | A52_LFE: + case A52_DOLBY | A52_LFE: + if(ch==6) return a52_resample_STEREO_LFE_to_6_C; + break; + case A52_3F | A52_LFE: + if(ch==6) return a52_resample_3F_LFE_to_6_C; + break; + case A52_2F2R | A52_LFE: + if(ch==6) return a52_resample_2F_2R_LFE_to_6_C; + break; + case A52_3F2R | A52_LFE: + if(ch==6) return a52_resample_3F_2R_LFE_to_6_C; + break; + } + return NULL; +} diff --git a/mpeg4/src/libavcodec/liba52/resample_mmx.c b/mpeg4/src/libavcodec/liba52/resample_mmx.c new file mode 100644 index 0000000000000000000000000000000000000000..a4e313c606c4e8ad65dbfb2b9eb42be07542351b --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/resample_mmx.c @@ -0,0 +1,518 @@ + +// MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) + +/* optimization TODO / NOTES + movntq is slightly faster (0.5% with the current test.c benchmark) + (but thats just test.c so that needs to be testd in reallity) + and it would mean (C / MMX2 / MMX / 3DNOW) versions +*/ + +static uint64_t __attribute__((aligned(8))) attribute_used magicF2W= 0x43c0000043c00000LL; +static uint64_t __attribute__((aligned(8))) attribute_used wm1010= 0xFFFF0000FFFF0000LL; +static uint64_t __attribute__((aligned(8))) attribute_used wm0101= 0x0000FFFF0000FFFFLL; +static uint64_t __attribute__((aligned(8))) attribute_used wm1100= 0xFFFFFFFF00000000LL; + +static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-512, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "movq "MANGLE(wm1100)", %%mm3 \n\t" + "movq "MANGLE(wm0101)", %%mm4 \n\t" + "movq "MANGLE(wm1010)", %%mm5 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq (%1, %%esi, 2), %%mm0 \n\t" + "movq 8(%1, %%esi, 2), %%mm1 \n\t" + "leal (%%esi, %%esi, 4), %%edi \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "pand %%mm4, %%mm0 \n\t" + "pand %%mm5, %%mm1 \n\t" + "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 + "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 + "pand %%mm3, %%mm0 \n\t" + "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 + "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B + "pand %%mm3, %%mm1 \n\t" + "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 + "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 + "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B + "addl $8, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1280), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 5*256; +} + +static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; +/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it +#ifdef HAVE_SSE + asm volatile( + "movl $-1024, %%esi \n\t" + "1: \n\t" + "cvtps2pi (%1, %%esi), %%mm0 \n\t" + "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" + "movq %%mm0, (%0, %%esi) \n\t" + "movq %%mm1, 8(%0, %%esi) \n\t" + "addl $16, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+512), "r" (f+256) + :"%esi", "memory" + );*/ + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "1: \n\t" + "movq (%1, %%esi), %%mm0 \n\t" + "movq 8(%1, %%esi), %%mm1 \n\t" + "movq 1024(%1, %%esi), %%mm2 \n\t" + "movq 1032(%1, %%esi), %%mm3 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" + "movq %%mm0, (%0, %%esi) \n\t" + "movq %%mm1, 8(%0, %%esi) \n\t" + "addl $16, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+512), "r" (f+256) + :"%esi", "memory" + ); + return 2*256; +} + +static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "movq %%mm7, %%mm5 \n\t" + "punpckldq %%mm6, %%mm5 \n\t" + "1: \n\t" + "movd (%1, %%esi), %%mm0 \n\t" + "punpckldq 2048(%1, %%esi), %%mm0\n\t" + "movd 1024(%1, %%esi), %%mm1 \n\t" + "punpckldq 4(%1, %%esi), %%mm1 \n\t" + "movd 2052(%1, %%esi), %%mm2 \n\t" + "movq %%mm7, %%mm3 \n\t" + "punpckldq 1028(%1, %%esi), %%mm3\n\t" + "movd 8(%1, %%esi), %%mm4 \n\t" + "punpckldq 2056(%1, %%esi), %%mm4\n\t" + "leal (%%esi, %%esi, 4), %%edi \n\t" + "sarl $1, %%edi \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm5, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "packssdw %%mm6, %%mm0 \n\t" + "packssdw %%mm2, %%mm1 \n\t" + "packssdw %%mm4, %%mm3 \n\t" + "movq %%mm0, (%0, %%edi) \n\t" + "movq %%mm1, 8(%0, %%edi) \n\t" + "movq %%mm3, 16(%0, %%edi) \n\t" + + "movd 1032(%1, %%esi), %%mm1 \n\t" + "punpckldq 12(%1, %%esi), %%mm1\n\t" + "movd 2060(%1, %%esi), %%mm2 \n\t" + "movq %%mm7, %%mm3 \n\t" + "punpckldq 1036(%1, %%esi), %%mm3\n\t" + "pxor %%mm0, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm5, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "movq %%mm0, 24(%0, %%edi) \n\t" + "movq %%mm2, 32(%0, %%edi) \n\t" + + "addl $16, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1280), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 5*256; +} + +static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "1: \n\t" + "movq (%1, %%esi), %%mm0 \n\t" + "movq 8(%1, %%esi), %%mm1 \n\t" + "movq 1024(%1, %%esi), %%mm2 \n\t" + "movq 1032(%1, %%esi), %%mm3 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "movq 2048(%1, %%esi), %%mm3 \n\t" + "movq 2056(%1, %%esi), %%mm4 \n\t" + "movq 3072(%1, %%esi), %%mm5 \n\t" + "movq 3080(%1, %%esi), %%mm6 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" + "psubd %%mm7, %%mm6 \n\t" + "packssdw %%mm4, %%mm3 \n\t" + "packssdw %%mm6, %%mm5 \n\t" + "movq %%mm0, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" + "punpcklwd %%mm5, %%mm3 \n\t" + "punpckhwd %%mm5, %%mm4 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm5 \n\t" + "punpckldq %%mm3, %%mm0 \n\t" + "punpckhdq %%mm3, %%mm2 \n\t" + "punpckldq %%mm4, %%mm1 \n\t" + "punpckhdq %%mm4, %%mm5 \n\t" + "movq %%mm0, (%0, %%esi,2) \n\t" + "movq %%mm2, 8(%0, %%esi,2) \n\t" + "movq %%mm1, 16(%0, %%esi,2) \n\t" + "movq %%mm5, 24(%0, %%esi,2) \n\t" + "addl $16, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1024), "r" (f+256) + :"%esi", "memory" + ); + return 4*256; +} + +static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "1: \n\t" + "movd (%1, %%esi), %%mm0 \n\t" + "punpckldq 2048(%1, %%esi), %%mm0\n\t" + "movd 3072(%1, %%esi), %%mm1 \n\t" + "punpckldq 4096(%1, %%esi), %%mm1\n\t" + "movd 1024(%1, %%esi), %%mm2 \n\t" + "punpckldq 4(%1, %%esi), %%mm2 \n\t" + "movd 2052(%1, %%esi), %%mm3 \n\t" + "punpckldq 3076(%1, %%esi), %%mm3\n\t" + "movd 4100(%1, %%esi), %%mm4 \n\t" + "punpckldq 1028(%1, %%esi), %%mm4\n\t" + "movd 8(%1, %%esi), %%mm5 \n\t" + "punpckldq 2056(%1, %%esi), %%mm5\n\t" + "leal (%%esi, %%esi, 4), %%edi \n\t" + "sarl $1, %%edi \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "packssdw %%mm5, %%mm4 \n\t" + "movq %%mm0, (%0, %%edi) \n\t" + "movq %%mm2, 8(%0, %%edi) \n\t" + "movq %%mm4, 16(%0, %%edi) \n\t" + + "movd 3080(%1, %%esi), %%mm0 \n\t" + "punpckldq 4104(%1, %%esi), %%mm0\n\t" + "movd 1032(%1, %%esi), %%mm1 \n\t" + "punpckldq 12(%1, %%esi), %%mm1\n\t" + "movd 2060(%1, %%esi), %%mm2 \n\t" + "punpckldq 3084(%1, %%esi), %%mm2\n\t" + "movd 4108(%1, %%esi), %%mm3 \n\t" + "punpckldq 1036(%1, %%esi), %%mm3\n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "movq %%mm0, 24(%0, %%edi) \n\t" + "movq %%mm2, 32(%0, %%edi) \n\t" + + "addl $16, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1280), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 5*256; +} + +static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq 1024(%1, %%esi), %%mm0 \n\t" + "movq 1032(%1, %%esi), %%mm1 \n\t" + "movq (%1, %%esi), %%mm2 \n\t" + "movq 8(%1, %%esi), %%mm3 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "packssdw %%mm1, %%mm0 \n\t" + "packssdw %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" + "leal (%%esi, %%esi, 2), %%edi \n\t" + "movq %%mm6, (%0, %%edi) \n\t" + "movd %%mm0, 8(%0, %%edi) \n\t" + "punpckhdq %%mm0, %%mm0 \n\t" + "movq %%mm6, 12(%0, %%edi) \n\t" + "movd %%mm0, 20(%0, %%edi) \n\t" + "movq %%mm6, 24(%0, %%edi) \n\t" + "movd %%mm1, 32(%0, %%edi) \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movq %%mm6, 36(%0, %%edi) \n\t" + "movd %%mm1, 44(%0, %%edi) \n\t" + "addl $16, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 6*256; +} + +static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq 1024(%1, %%esi), %%mm0 \n\t" + "movq 2048(%1, %%esi), %%mm1 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm5 \n\t" + "leal (%%esi, %%esi, 2), %%edi \n\t" + + "pxor %%mm4, %%mm4 \n\t" + "packssdw %%mm5, %%mm0 \n\t" // FfAa + "packssdw %%mm4, %%mm1 \n\t" // 00Bb + "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 + "punpcklwd %%mm1, %%mm0 \n\t" // BAba + "movq %%mm0, %%mm1 \n\t" // BAba + "punpckldq %%mm4, %%mm3 \n\t" // f0XX + "punpckldq %%mm6, %%mm0 \n\t" // 00ba + "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 + + "movq %%mm0, (%0, %%edi) \n\t" // 00ba + "punpckhdq %%mm4, %%mm0 \n\t" // F000 + "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 + "movq %%mm0, 16(%0, %%edi) \n\t" // F000 + "addl $8, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 6*256; +} + +static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" + "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq 1024(%1, %%esi), %%mm0 \n\t" + "movq 3072(%1, %%esi), %%mm1 \n\t" + "movq 2048(%1, %%esi), %%mm4 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" + "leal (%%esi, %%esi, 2), %%edi \n\t" + + "packssdw %%mm4, %%mm0 \n\t" // EeAa + "packssdw %%mm5, %%mm1 \n\t" // FfBb + "movq %%mm0, %%mm2 \n\t" // EeAa + "punpcklwd %%mm1, %%mm0 \n\t" // BAba + "punpckhwd %%mm1, %%mm2 \n\t" // FEfe + "movq %%mm0, %%mm1 \n\t" // BAba + "punpckldq %%mm6, %%mm0 \n\t" // 00ba + "punpckhdq %%mm1, %%mm1 \n\t" // BABA + + "movq %%mm0, (%0, %%edi) \n\t" + "punpckhdq %%mm2, %%mm0 \n\t" // FE00 + "punpckldq %%mm1, %%mm2 \n\t" // BAfe + "movq %%mm2, 8(%0, %%edi) \n\t" + "movq %%mm0, 16(%0, %%edi) \n\t" + "addl $8, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 6*256; +} + +static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" +// "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq 1024(%1, %%esi), %%mm0 \n\t" + "movq 2048(%1, %%esi), %%mm1 \n\t" + "movq 3072(%1, %%esi), %%mm2 \n\t" + "movq 4096(%1, %%esi), %%mm3 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm5 \n\t" + "leal (%%esi, %%esi, 2), %%edi \n\t" + + "packssdw %%mm2, %%mm0 \n\t" // CcAa + "packssdw %%mm3, %%mm1 \n\t" // DdBb + "packssdw %%mm5, %%mm5 \n\t" // FfFf + "movq %%mm0, %%mm2 \n\t" // CcAa + "punpcklwd %%mm1, %%mm0 \n\t" // BAba + "punpckhwd %%mm1, %%mm2 \n\t" // DCdc + "pxor %%mm4, %%mm4 \n\t" // 0000 + "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 + "movq %%mm0, %%mm1 \n\t" // BAba + "movq %%mm4, %%mm3 \n\t" // F0f0 + "punpckldq %%mm2, %%mm0 \n\t" // dcba + "punpckhdq %%mm1, %%mm1 \n\t" // BABA + "punpckldq %%mm1, %%mm4 \n\t" // BAf0 + "punpckhdq %%mm3, %%mm2 \n\t" // F0DC + + "movq %%mm0, (%0, %%edi) \n\t" + "movq %%mm4, 8(%0, %%edi) \n\t" + "movq %%mm2, 16(%0, %%edi) \n\t" + "addl $8, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 6*256; +} + +static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ + int32_t * f = (int32_t *) _f; + asm volatile( + "movl $-1024, %%esi \n\t" + "movq "MANGLE(magicF2W)", %%mm7 \n\t" +// "pxor %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq 1024(%1, %%esi), %%mm0 \n\t" + "movq 3072(%1, %%esi), %%mm1 \n\t" + "movq 4096(%1, %%esi), %%mm2 \n\t" + "movq 5120(%1, %%esi), %%mm3 \n\t" + "movq 2048(%1, %%esi), %%mm4 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" + "psubd %%mm7, %%mm0 \n\t" + "psubd %%mm7, %%mm1 \n\t" + "psubd %%mm7, %%mm2 \n\t" + "psubd %%mm7, %%mm3 \n\t" + "psubd %%mm7, %%mm4 \n\t" + "psubd %%mm7, %%mm5 \n\t" + "leal (%%esi, %%esi, 2), %%edi \n\t" + + "packssdw %%mm2, %%mm0 \n\t" // CcAa + "packssdw %%mm3, %%mm1 \n\t" // DdBb + "packssdw %%mm4, %%mm4 \n\t" // EeEe + "packssdw %%mm5, %%mm5 \n\t" // FfFf + "movq %%mm0, %%mm2 \n\t" // CcAa + "punpcklwd %%mm1, %%mm0 \n\t" // BAba + "punpckhwd %%mm1, %%mm2 \n\t" // DCdc + "punpcklwd %%mm5, %%mm4 \n\t" // FEfe + "movq %%mm0, %%mm1 \n\t" // BAba + "movq %%mm4, %%mm3 \n\t" // FEfe + "punpckldq %%mm2, %%mm0 \n\t" // dcba + "punpckhdq %%mm1, %%mm1 \n\t" // BABA + "punpckldq %%mm1, %%mm4 \n\t" // BAfe + "punpckhdq %%mm3, %%mm2 \n\t" // FEDC + + "movq %%mm0, (%0, %%edi) \n\t" + "movq %%mm4, 8(%0, %%edi) \n\t" + "movq %%mm2, 16(%0, %%edi) \n\t" + "addl $8, %%esi \n\t" + " jnz 1b \n\t" + "emms \n\t" + :: "r" (s16+1536), "r" (f+256) + :"%esi", "%edi", "memory" + ); + return 6*256; +} + + +static void* a52_resample_MMX(int flags, int ch){ + switch (flags) { + case A52_MONO: + if(ch==5) return a52_resample_MONO_to_5_MMX; + break; + case A52_CHANNEL: + case A52_STEREO: + case A52_DOLBY: + if(ch==2) return a52_resample_STEREO_to_2_MMX; + break; + case A52_3F: + if(ch==5) return a52_resample_3F_to_5_MMX; + break; + case A52_2F2R: + if(ch==4) return a52_resample_2F_2R_to_4_MMX; + break; + case A52_3F2R: + if(ch==5) return a52_resample_3F_2R_to_5_MMX; + break; + case A52_MONO | A52_LFE: + if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; + break; + case A52_CHANNEL | A52_LFE: + case A52_STEREO | A52_LFE: + case A52_DOLBY | A52_LFE: + if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; + break; + case A52_3F | A52_LFE: + if(ch==6) return a52_resample_3F_LFE_to_6_MMX; + break; + case A52_2F2R | A52_LFE: + if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; + break; + case A52_3F2R | A52_LFE: + if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; + break; + } + return NULL; +} + + diff --git a/mpeg4/src/libavcodec/liba52/tables.h b/mpeg4/src/libavcodec/liba52/tables.h new file mode 100644 index 0000000000000000000000000000000000000000..7f921c9d03ce6b460402ba4211f08419affe59a3 --- /dev/null +++ b/mpeg4/src/libavcodec/liba52/tables.h @@ -0,0 +1,246 @@ +/* + * tables.h + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of a52dec, a free ATSC A-52 stream decoder. + * See http://liba52.sourceforge.net/ for updates. + * + * a52dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * a52dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +static const int8_t exp_1[128] = { + -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 25,25,25 +}; +static const int8_t exp_2[128] = { + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 25,25,25 +}; +static const int8_t exp_3[128] = { + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + 25,25,25 +}; + +#define Q(x) ROUND (32768.0 * x) + +#define Q0 Q (-2/3) +#define Q1 Q (0) +#define Q2 Q (2/3) + +static const quantizer_t q_1_0[32] = { + Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, + Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, + Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, + 0, 0, 0, 0, 0 +}; + +static const quantizer_t q_1_1[32] = { + Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, + Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, + Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, + 0, 0, 0, 0, 0 +}; + +static const quantizer_t q_1_2[32] = { + Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, + Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, + Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, + 0, 0, 0, 0, 0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 + +#define Q0 Q (-4/5) +#define Q1 Q (-2/5) +#define Q2 Q (0) +#define Q3 Q (2/5) +#define Q4 Q (4/5) + +static const quantizer_t q_2_0[128] = { + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4, + 0,0,0 +}; + +static const quantizer_t q_2_1[128] = { + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + 0,0,0 +}; + +static const quantizer_t q_2_2[128] = { + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + 0,0,0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 + +static const quantizer_t q_3[8] = { + Q (-6/7), Q (-4/7), Q (-2/7), Q (0), Q (2/7), Q (4/7), Q (6/7), 0 +}; + +#define Q0 Q (-10/11) +#define Q1 Q (-8/11) +#define Q2 Q (-6/11) +#define Q3 Q (-4/11) +#define Q4 Q (-2/11) +#define Q5 Q (0) +#define Q6 Q (2/11) +#define Q7 Q (4/11) +#define Q8 Q (6/11) +#define Q9 Q (8/11) +#define QA Q (10/11) + +static const quantizer_t q_4_0[128] = { + Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, + Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, + Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, + Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, + Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, + Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, + Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, + Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, + Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, + Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, + QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, + 0, 0, 0, 0, 0, 0, 0 +}; + +static const quantizer_t q_4_1[128] = { + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + 0, 0, 0, 0, 0, 0, 0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 +#undef Q5 +#undef Q6 +#undef Q7 +#undef Q8 +#undef Q9 +#undef QA + +static const quantizer_t q_5[16] = { + Q (-14/15), Q (-12/15), Q (-10/15), Q (-8/15), Q (-6/15), + Q (-4/15), Q (-2/15), Q (0), Q (2/15), Q (4/15), + Q (6/15), Q (8/15), Q (10/15), Q (12/15), Q (14/15), 0 +}; + +#ifndef LIBA52_FIXED +static const sample_t scale_factor[25] = { + 0.000030517578125, + 0.0000152587890625, + 0.00000762939453125, + 0.000003814697265625, + 0.0000019073486328125, + 0.00000095367431640625, + 0.000000476837158203125, + 0.0000002384185791015625, + 0.00000011920928955078125, + 0.000000059604644775390625, + 0.0000000298023223876953125, + 0.00000001490116119384765625, + 0.000000007450580596923828125, + 0.0000000037252902984619140625, + 0.00000000186264514923095703125, + 0.000000000931322574615478515625, + 0.0000000004656612873077392578125, + 0.00000000023283064365386962890625, + 0.000000000116415321826934814453125, + 0.0000000000582076609134674072265625, + 0.00000000002910383045673370361328125, + 0.000000000014551915228366851806640625, + 0.0000000000072759576141834259033203125, + 0.00000000000363797880709171295166015625, + 0.000000000001818989403545856475830078125 +}; +#endif + +static const uint16_t dither_lut[256] = { + 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055, + 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb, + 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198, + 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176, + 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf, + 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321, + 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202, + 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec, + 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761, + 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f, + 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac, + 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642, + 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb, + 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415, + 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536, + 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8, + 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c, + 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2, + 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1, + 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f, + 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6, + 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58, + 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b, + 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95, + 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918, + 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6, + 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5, + 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b, + 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82, + 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c, + 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f, + 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1 +}; diff --git a/mpeg4/src/libavcodec/libgsm.c b/mpeg4/src/libavcodec/libgsm.c new file mode 100644 index 0000000000000000000000000000000000000000..731161e88aef00ad8f681d86630da1c748bae4b2 --- /dev/null +++ b/mpeg4/src/libavcodec/libgsm.c @@ -0,0 +1,95 @@ +/* + * Interface to libgsm for gsm encoding/decoding + * Copyright (c) 2005 Alban Bedel + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file libgsm.c + * Interface to libgsm for gsm encoding/decoding + */ + +#include "avcodec.h" +#include + +// gsm.h miss some essential constants +#define GSM_BLOCK_SIZE 33 +#define GSM_FRAME_SIZE 160 + +static int libgsm_init(AVCodecContext *avctx) { + if (avctx->channels > 1 || avctx->sample_rate != 8000) + return -1; + + avctx->frame_size = GSM_FRAME_SIZE; + avctx->block_align = GSM_BLOCK_SIZE; + + avctx->priv_data = gsm_create(); + + avctx->coded_frame= avcodec_alloc_frame(); + avctx->coded_frame->key_frame= 1; + + return 0; +} + +static int libgsm_close(AVCodecContext *avctx) { + gsm_destroy(avctx->priv_data); + avctx->priv_data = NULL; + return 0; +} + +static int libgsm_encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) { + // we need a full block + if(buf_size < GSM_BLOCK_SIZE) return 0; + + gsm_encode(avctx->priv_data,data,frame); + + return GSM_BLOCK_SIZE; +} + + +AVCodec libgsm_encoder = { + "gsm", + CODEC_TYPE_AUDIO, + CODEC_ID_GSM, + 0, + libgsm_init, + libgsm_encode_frame, + libgsm_close, +}; + +static int libgsm_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) { + + if(buf_size < GSM_BLOCK_SIZE) return 0; + + if(gsm_decode(avctx->priv_data,buf,data)) return -1; + + *data_size = GSM_FRAME_SIZE*2; + return GSM_BLOCK_SIZE; +} + +AVCodec libgsm_decoder = { + "gsm", + CODEC_TYPE_AUDIO, + CODEC_ID_GSM, + 0, + libgsm_init, + NULL, + libgsm_close, + libgsm_decode_frame, +}; diff --git a/mpeg4/src/libavcodec/libpostproc/Makefile b/mpeg4/src/libavcodec/libpostproc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..00e4033ceaee49d245d5e933b55f687b53c48894 --- /dev/null +++ b/mpeg4/src/libavcodec/libpostproc/Makefile @@ -0,0 +1,28 @@ + +include ../../config.mak + +# Overload incdir, postproc include files go in a different directory. +incdir=$(prefix)/include/postproc + +NAME=postproc +SUBDIR=libavcodec/libpostproc +ifeq ($(BUILD_SHARED),yes) +LIBVERSION=$(SPPVERSION) +LIBMAJOR=$(SPPMAJOR) +endif + +STATIC_OBJS=postprocess.o +SHARED_OBJS=postprocess_pic.o + +HEADERS = postprocess.h + +CFLAGS = $(OPTFLAGS) $(MLIB_INC) -I. -I.. -I$(SRC_PATH)/libavcodec -I../.. $(EXTRA_INC) +# -I/usr/X11R6/include/ + +include $(SRC_PATH)/common.mak + +ifeq ($(BUILD_SHARED),yes) +postprocess_pic.o: postprocess.c + $(CC) -c $(CFLAGS) -fomit-frame-pointer -fPIC -DPIC -I.. -I../.. -o $@ $< +endif + diff --git a/mpeg4/src/libavcodec/libpostproc/mangle.h b/mpeg4/src/libavcodec/libpostproc/mangle.h new file mode 100644 index 0000000000000000000000000000000000000000..46480ab4379c1eb8ea6635b5f308106e9cce43a3 --- /dev/null +++ b/mpeg4/src/libavcodec/libpostproc/mangle.h @@ -0,0 +1,30 @@ +/* mangle.h - This file has some CPP macros to deal with different symbol + * mangling across binary formats. + * (c)2002 by Felix Buenemann + * File licensed under the GPL, see http://www.fsf.org/ for more info. + */ + +#ifndef __MANGLE_H +#define __MANGLE_H + +/* Feel free to add more to the list, eg. a.out IMO */ +/* Use rip-relative addressing if compiling PIC code on x86-64. */ +#if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \ + (defined(__OpenBSD__) && !defined(__ELF__)) +#if defined(ARCH_X86_64) && defined(PIC) +#define MANGLE(a) "_" #a"(%%rip)" +#else +#define MANGLE(a) "_" #a +#endif +#else +#if defined(ARCH_X86_64) && defined(PIC) +#define MANGLE(a) #a"(%%rip)" +#elif defined(CONFIG_DARWIN) +#define MANGLE(a) "_" #a +#else +#define MANGLE(a) #a +#endif +#endif + +#endif /* !__MANGLE_H */ + diff --git a/mpeg4/src/libavcodec/libpostproc/postprocess.c b/mpeg4/src/libavcodec/libpostproc/postprocess.c new file mode 100644 index 0000000000000000000000000000000000000000..5574026e37fc761082fccb69e8ade21297bf54cc --- /dev/null +++ b/mpeg4/src/libavcodec/libpostproc/postprocess.c @@ -0,0 +1,1158 @@ +/* + Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at) + + AltiVec optimizations (C) 2004 Romain Dolbeau + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/** + * @file postprocess.c + * postprocessing. + */ + +/* + C MMX MMX2 3DNow AltiVec +isVertDC Ec Ec Ec +isVertMinMaxOk Ec Ec Ec +doVertLowPass E e e Ec +doVertDefFilter Ec Ec e e Ec +isHorizDC Ec Ec Ec +isHorizMinMaxOk a E Ec +doHorizLowPass E e e Ec +doHorizDefFilter Ec Ec e e Ec +do_a_deblock Ec E Ec E +deRing E e e* Ecp +Vertical RKAlgo1 E a a +Horizontal RKAlgo1 a a +Vertical X1# a E E +Horizontal X1# a E E +LinIpolDeinterlace e E E* +CubicIpolDeinterlace a e e* +LinBlendDeinterlace e E E* +MedianDeinterlace# E Ec Ec +TempDeNoiser# E e e Ec + +* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work +# more or less selfinvented filters so the exactness isnt too meaningfull +E = Exact implementation +e = allmost exact implementation (slightly different rounding,...) +a = alternative / approximate impl +c = checked against the other implementations (-vo md5) +p = partially optimized, still some work to do +*/ + +/* +TODO: +reduce the time wasted on the mem transfer +unroll stuff if instructions depend too much on the prior one +move YScale thing to the end instead of fixing QP +write a faster and higher quality deblocking filter :) +make the mainloop more flexible (variable number of blocks at once + (the if/else stuff per block is slowing things down) +compare the quality & speed of all filters +split this huge file +optimize c versions +try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks +... +*/ + +//Changelog: use the CVS log + +#include "config.h" +#include +#include +#include +#include +#ifdef HAVE_MALLOC_H +#include +#endif +//#undef HAVE_MMX2 +//#define HAVE_3DNOW +//#undef HAVE_MMX +//#undef ARCH_X86 +//#define DEBUG_BRIGHTNESS +#ifdef USE_FASTMEMCPY +#include "fastmemcpy.h" +#endif +#include "postprocess.h" +#include "postprocess_internal.h" + +#include "mangle.h" //FIXME should be supressed + +#ifdef HAVE_ALTIVEC_H +#include +#endif + +#ifndef HAVE_MEMALIGN +#define memalign(a,b) malloc(b) +#endif + +#define MIN(a,b) ((a) > (b) ? (b) : (a)) +#define MAX(a,b) ((a) < (b) ? (b) : (a)) +#define ABS(a) ((a) > 0 ? (a) : (-(a))) +#define SIGN(a) ((a) > 0 ? 1 : -1) + +#define GET_MODE_BUFFER_SIZE 500 +#define OPTIONS_ARRAY_SIZE 10 +#define BLOCK_SIZE 8 +#define TEMP_STRIDE 8 +//#define NUM_BLOCKS_AT_ONCE 16 //not used yet + +#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) +# define attribute_used __attribute__((used)) +# define always_inline __attribute__((always_inline)) inline +#else +# define attribute_used +# define always_inline inline +#endif + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; +static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; +static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; +static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; +static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; +static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; +static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; +static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; +#endif + +static uint8_t clip_table[3*256]; +static uint8_t * const clip_tab= clip_table + 256; + +static const int verbose= 0; + +static const int attribute_used deringThreshold= 20; + + +static struct PPFilter filters[]= +{ + {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, + {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, +/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER}, + {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ + {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, + {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, + {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, + {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, + {"dr", "dering", 1, 5, 6, DERING}, + {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, + {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, + {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, + {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, + {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, + {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, + {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, + {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, + {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, + {NULL, NULL,0,0,0,0} //End Marker +}; + +static char *replaceTable[]= +{ + "default", "hdeblock:a,vdeblock:a,dering:a", + "de", "hdeblock:a,vdeblock:a,dering:a", + "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", + "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", + "ac", "ha:a:128:7,va:a,dering:a", + NULL //End Marker +}; + + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static inline void prefetchnta(void *p) +{ + asm volatile( "prefetchnta (%0)\n\t" + : : "r" (p) + ); +} + +static inline void prefetcht0(void *p) +{ + asm volatile( "prefetcht0 (%0)\n\t" + : : "r" (p) + ); +} + +static inline void prefetcht1(void *p) +{ + asm volatile( "prefetcht1 (%0)\n\t" + : : "r" (p) + ); +} + +static inline void prefetcht2(void *p) +{ + asm volatile( "prefetcht2 (%0)\n\t" + : : "r" (p) + ); +} +#endif + +// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing + +/** + * Check if the given 8x8 Block is mostly "flat" + */ +static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) +{ + int numEq= 0; + int y; + const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; + + for(y=0; y c->ppMode.flatnessThreshold; +} + +/** + * Check if the middle 8x8 Block in the given 8x16 block is flat + */ +static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ + int numEq= 0; + int y; + const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; + + src+= stride*4; // src points to begin of the 8x8 Block + for(y=0; y c->ppMode.flatnessThreshold; +} + +static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) +{ + int i; +#if 1 + for(i=0; i<2; i++){ + if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; + src += stride; + if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; + src += stride; + if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; + src += stride; + if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; + src += stride; + } +#else + for(i=0; i<8; i++){ + if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; + src += stride; + } +#endif + return 1; +} + +static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) +{ +#if 1 +#if 1 + int x; + src+= stride*4; + for(x=0; x 4*QP) return 0; + if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; + if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; + if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; + } +#else + int x; + src+= stride*3; + for(x=0; x 4*QP) return 0; + } +#endif + return 1; +#else + int x; + src+= stride*4; + for(x=0; xmax) max=v; + if(v 2*QP) return 0; + } + return 1; +#endif +} + +static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ + if( isHorizDC_C(src, stride, c) ){ + if( isHorizMinMaxOk_C(src, stride, c->QP) ) + return 1; + else + return 0; + }else{ + return 2; + } +} + +static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ + if( isVertDC_C(src, stride, c) ){ + if( isVertMinMaxOk_C(src, stride, c->QP) ) + return 1; + else + return 0; + }else{ + return 2; + } +} + +static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) +{ + int y; + for(y=0; yQP) + { + const int q=(dst[3] - dst[4])/2; + const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); + const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); + + int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); + d= MAX(d, 0); + + d= (5*d + 32) >> 6; + d*= SIGN(-middleEnergy); + + if(q>0) + { + d= d<0 ? 0 : d; + d= d>q ? q : d; + } + else + { + d= d>0 ? 0 : d; + d= dQP ? dst[-1] : dst[0]; + const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; + + int sums[10]; + sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; + sums[1] = sums[0] - first + dst[3]; + sums[2] = sums[1] - first + dst[4]; + sums[3] = sums[2] - first + dst[5]; + sums[4] = sums[3] - first + dst[6]; + sums[5] = sums[4] - dst[0] + dst[7]; + sums[6] = sums[5] - dst[1] + last; + sums[7] = sums[6] - dst[2] + last; + sums[8] = sums[7] - dst[3] + last; + sums[9] = sums[8] - dst[4] + last; + + dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; + dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; + dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; + dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; + dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; + dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; + dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; + dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; + + dst+= stride; + } +} + +/** + * Experimental Filter 1 (Horizontal) + * will not damage linear gradients + * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter + * can only smooth blocks at the expected locations (it cant smooth them if they did move) + * MMX2 version does correct clipping C version doesnt + * not identical with the vertical one + */ +static inline void horizX1Filter(uint8_t *src, int stride, int QP) +{ + int y; + static uint64_t *lut= NULL; + if(lut==NULL) + { + int i; + lut= (uint64_t*)memalign(8, 256*8); + for(i=0; i<256; i++) + { + int v= i < 128 ? 2*i : 2*(i-256); +/* +//Simulate 112242211 9-Tap filter + uint64_t a= (v/16) & 0xFF; + uint64_t b= (v/8) & 0xFF; + uint64_t c= (v/4) & 0xFF; + uint64_t d= (3*v/8) & 0xFF; +*/ +//Simulate piecewise linear interpolation + uint64_t a= (v/16) & 0xFF; + uint64_t b= (v*3/16) & 0xFF; + uint64_t c= (v*5/16) & 0xFF; + uint64_t d= (7*v/16) & 0xFF; + uint64_t A= (0x100 - a)&0xFF; + uint64_t B= (0x100 - b)&0xFF; + uint64_t C= (0x100 - c)&0xFF; + uint64_t D= (0x100 - c)&0xFF; + + lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | + (D<<24) | (C<<16) | (B<<8) | (A); + //lut[i] = (v<<32) | (v<<24); + } + } + + for(y=0; yQP; + const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; +//START_TIMER + src+= step*4; // src points to begin of the 8x8 Block + for(y=0; y<8; y++){ + int numEq= 0; + + if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; + if(numEq > c->ppMode.flatnessThreshold){ + int min, max, x; + + if(src[0] > src[step]){ + max= src[0]; + min= src[step]; + }else{ + max= src[step]; + min= src[0]; + } + for(x=2; x<8; x+=2){ + if(src[x*step] > src[(x+1)*step]){ + if(src[x *step] > max) max= src[ x *step]; + if(src[(x+1)*step] < min) min= src[(x+1)*step]; + }else{ + if(src[(x+1)*step] > max) max= src[(x+1)*step]; + if(src[ x *step] < min) min= src[ x *step]; + } + } + if(max-min < 2*QP){ + const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; + const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; + + int sums[10]; + sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; + sums[1] = sums[0] - first + src[3*step]; + sums[2] = sums[1] - first + src[4*step]; + sums[3] = sums[2] - first + src[5*step]; + sums[4] = sums[3] - first + src[6*step]; + sums[5] = sums[4] - src[0*step] + src[7*step]; + sums[6] = sums[5] - src[1*step] + last; + sums[7] = sums[6] - src[2*step] + last; + sums[8] = sums[7] - src[3*step] + last; + sums[9] = sums[8] - src[4*step] + last; + + src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; + src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; + src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; + src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; + src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; + src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; + src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; + src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; + } + }else{ + const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); + + if(ABS(middleEnergy) < 8*QP) + { + const int q=(src[3*step] - src[4*step])/2; + const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); + const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); + + int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); + d= MAX(d, 0); + + d= (5*d + 32) >> 6; + d*= SIGN(-middleEnergy); + + if(q>0) + { + d= d<0 ? 0 : d; + d= d>q ? q : d; + } + else + { + d= d>0 ? 0 : d; + d= dppMode= *ppMode; //FIXME + + // useing ifs here as they are faster than function pointers allthough the + // difference wouldnt be messureable here but its much better because + // someone might exchange the cpu whithout restarting mplayer ;) +#ifdef RUNTIME_CPUDETECT +#if defined(ARCH_X86) || defined(ARCH_X86_64) + // ordered per speed fasterst first + if(c->cpuCaps & PP_CPU_CAPS_MMX2) + postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); + else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) + postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); + else if(c->cpuCaps & PP_CPU_CAPS_MMX) + postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); + else + postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#else +#ifdef ARCH_POWERPC +#ifdef HAVE_ALTIVEC + if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) + postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); + else +#endif +#endif + postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#endif +#else //RUNTIME_CPUDETECT +#ifdef HAVE_MMX2 + postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#elif defined (HAVE_3DNOW) + postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#elif defined (HAVE_MMX) + postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#elif defined (HAVE_ALTIVEC) + postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#else + postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#endif +#endif //!RUNTIME_CPUDETECT +} + +//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, +// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); + +/* -pp Command line Help +*/ +char *pp_help= +"Available postprocessing filters:\n" +"Filters Options\n" +"short long name short long option Description\n" +"* * a autoq CPU power dependent enabler\n" +" c chrom chrominance filtering enabled\n" +" y nochrom chrominance filtering disabled\n" +" n noluma luma filtering disabled\n" +"hb hdeblock (2 threshold) horizontal deblocking filter\n" +" 1. difference factor: default=32, higher -> more deblocking\n" +" 2. flatness threshold: default=39, lower -> more deblocking\n" +" the h & v deblocking filters share these\n" +" so you can't set different thresholds for h / v\n" +"vb vdeblock (2 threshold) vertical deblocking filter\n" +"ha hadeblock (2 threshold) horizontal deblocking filter\n" +"va vadeblock (2 threshold) vertical deblocking filter\n" +"h1 x1hdeblock experimental h deblock filter 1\n" +"v1 x1vdeblock experimental v deblock filter 1\n" +"dr dering deringing filter\n" +"al autolevels automatic brightness / contrast\n" +" f fullyrange stretch luminance to (0..255)\n" +"lb linblenddeint linear blend deinterlacer\n" +"li linipoldeint linear interpolating deinterlace\n" +"ci cubicipoldeint cubic interpolating deinterlacer\n" +"md mediandeint median deinterlacer\n" +"fd ffmpegdeint ffmpeg deinterlacer\n" +"l5 lowpass5 FIR lowpass deinterlacer\n" +"de default hb:a,vb:a,dr:a\n" +"fa fast h1:a,v1:a,dr:a\n" +"ac ha:a:128:7,va:a,dr:a\n" +"tn tmpnoise (3 threshold) temporal noise reducer\n" +" 1. <= 2. <= 3. larger -> stronger filtering\n" +"fq forceQuant force quantizer\n" +"Usage:\n" +"[: