- edited description
1.8 or latest code crashes in SIMD code
Since upgrading to 1.8 or the latest code I can no longer encode anything with FFmpeg. 1.7 worked fine. Disabling the assembly code so it falls back to the C code paths works fine.
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x00000f34ed4aede9 in x265_pixel_sa8d_8x8_internal_avx2 () from /usr/local/lib/libx265.so.5.0
(gdb) bt full
#0 0x00000f34ed4aede9 in x265_pixel_sa8d_8x8_internal_avx2 () from /usr/local/lib/libx265.so.5.0
No symbol table info available.
#1 0x00000f34ed4aefff in x265_pixel_sa8d_16x16_internal_avx2 () from /usr/local/lib/libx265.so.5.0
No symbol table info available.
#2 0x00000f34ed4af0be in x265_pixel_sa8d_32x32_avx2 () from /usr/local/lib/libx265.so.5.0
No symbol table info available.
#3 0x00000f34ed421ac7 in x265::Search::estIntraPredQT (this=0xf34fdc9fbd0, intraMode=..., cuGeom=..., depthRange=0xf351ba5d6d0)
at /home/ports/pobj/x265-1.9/x265_1.9/source/encoder/search.cpp:1527
mode = 34
fenc = 0xf34b73b0400 '\020' <repeats 200 times>...
costShift = 2
stride = 64
mpms = 67108867
sad = 114688
scaleStride = 32
mpmModes = {0, 1, 26}
sa8d = 0xf34ed4af050 <x265_pixel_sa8d_32x32_avx2>
modeCosts = {114689, 114691, 114696 <repeats 24 times>, 114691, 114696, 114696, 114696, 114696, 114696, 114696, 114696, 0}
bits = 6
planar = 0xf34fdc9fc38 '\200' <repeats 200 times>...
paddedBcost = 0
intraNeighbors = {numIntraNeighbor = 0, totalUnits = 65, aboveUnits = 32, leftUnits = 32, unitWidth = 4, unitHeight = 4, log2TrSize = 6, bNeighborFlags = {
false <repeats 65 times>}}
scaleTuSize = 32
rbits = 6
candCostList = {16720821992704, 16719027000128, 15, 4294967295, 16716901651536, 16718909078553, 25769803775, 25769803781, 16720771536448, 16719996900323, 25769803777, 8589934597,
16720771536496, 16719996878099, 463853168, 6}
rdModeList = {0, 0, 0, 0, 2900807980, 3892, 2896359272, 3892, 266242, 0, 3623422084, 1074018641, 514309376, 3893, 270336, 0}
bcost = 114689
maxCandCount = 5
bmode = 0
icosts = {rdcost = 0, bits = 0, distortion = 0, energy = 0}
puIdx = 0
cu = @0xf34fdca62d8: {static s_partSet = {0xf34ed793e30 <bcast256(unsigned char*, unsigned char)>, 0xf34ed793e70 <bcast64(unsigned char*, unsigned char)>,
0xf34ed793f20 <bcast16(unsigned char*, unsigned char)>, 0xf34ed793f80 <bcast4(unsigned char*, unsigned char)>, 0xf34ed793fc0 <bcast1(unsigned char*, unsigned char)>},
static s_numPartInCUSize = 16, m_encData = 0xf3493c9e400, m_slice = 0xf351ac36670, m_partCopy = 0xf34ed793ff0 <copy256(unsigned char*, unsigned char*)>,
m_partSet = 0xf34ed793e30 <bcast256(unsigned char*, unsigned char)>, m_subPartCopy = 0xf34ed794030 <copy64(unsigned char*, unsigned char*)>,
m_subPartSet = 0xf34ed793e70 <bcast64(unsigned char*, unsigned char)>, m_cuAddr = 0, m_absIdxInCTU = 0, m_cuPelX = 0, m_cuPelY = 0, m_numPartitions = 256, m_chromaFormat = 1,
m_hChromaShift = 1, m_vChromaShift = 1, m_qp = 0xf34f71f2a00 '\017' <repeats 200 times>..., m_log2CUSize = 0xf34f71f2b00 '\006' <repeats 200 times>...,
m_lumaIntraDir = 0xf34f71f2c00 '\377' <repeats 200 times>..., m_tqBypass = 0xf34f71f2e00 "", m_refIdx = {0xf34f71f2f00 '\377' <repeats 200 times>...,
0xf34f71f3000 '\377' <repeats 200 times>...}, m_cuDepth = 0xf34f71f3100 "", m_predMode = 0xf34f71f3200 '\002' <repeats 200 times>..., m_partSize = 0xf34f71f3300 "",
m_mergeFlag = 0xf34f71f3400 "", m_interDir = 0xf34f71f3500 "", m_mvpIdx = {0xf34f71f3600 "", 0xf34f71f3700 ""}, m_tuDepth = 0xf34f71f3800 "", m_transformSkip = {
0xf34f71f3900 "", 0xf34f71f3a00 "", 0xf34f71f3b00 ""}, m_cbf = {0xf34f71f3c00 "", 0xf34f71f3d00 "", 0xf34f71f3e00 ""},
m_chromaIntraDir = 0xf34f71f2d00 '\377' <repeats 200 times>..., m_trCoeff = {0xf34f4a5e000, 0xf34f4a60000, 0xf34f4a60800}, m_mv = {0xf34fc8de000, 0xf34fc8de400}, m_mvd = {
0xf34fc8de800, 0xf34fc8dec00}, m_cuAboveLeft = 0x0, m_cuAboveRight = 0x0, m_cuAbove = 0x0, m_cuLeft = 0x0}
reconYuv = 0xf34fdca6460
predYuv = 0xf34fdca6430
fencYuv = 0xf34fdcabc80
depth = 0
initTuDepth = 0
numPU = 1
log2TrSize = 6
tuSize = 64
qNumParts = 64
sizeIdx = 3
absPartIdx = 0
totalDistortion = 0
checkTransformSkip = 0
#4 0x00000f34ed420c56 in x265::Search::checkIntra (this=0xf34fdc9fbd0, intraMode=..., cuGeom=..., partSize=x265::SIZE_2Nx2N)
at /home/ports/pobj/x265-1.9/x265_1.9/source/encoder/search.cpp:1171
cu = @0xf34fdca62d8: {static s_partSet = <same as static member of an already seen type>, static s_numPartInCUSize = 16, m_encData = 0xf3493c9e400, m_slice = 0xf351ac36670,
m_partCopy = 0xf34ed793ff0 <copy256(unsigned char*, unsigned char*)>, m_partSet = 0xf34ed793e30 <bcast256(unsigned char*, unsigned char)>,
m_subPartCopy = 0xf34ed794030 <copy64(unsigned char*, unsigned char*)>, m_subPartSet = 0xf34ed793e70 <bcast64(unsigned char*, unsigned char)>, m_cuAddr = 0, m_absIdxInCTU = 0,
m_cuPelX = 0, m_cuPelY = 0, m_numPartitions = 256, m_chromaFormat = 1, m_hChromaShift = 1, m_vChromaShift = 1, m_qp = 0xf34f71f2a00 '\017' <repeats 200 times>...,
m_log2CUSize = 0xf34f71f2b00 '\006' <repeats 200 times>..., m_lumaIntraDir = 0xf34f71f2c00 '\377' <repeats 200 times>..., m_tqBypass = 0xf34f71f2e00 "", m_refIdx = {
0xf34f71f2f00 '\377' <repeats 200 times>..., 0xf34f71f3000 '\377' <repeats 200 times>...}, m_cuDepth = 0xf34f71f3100 "",
m_predMode = 0xf34f71f3200 '\002' <repeats 200 times>..., m_partSize = 0xf34f71f3300 "", m_mergeFlag = 0xf34f71f3400 "", m_interDir = 0xf34f71f3500 "", m_mvpIdx = {
0xf34f71f3600 "", 0xf34f71f3700 ""}, m_tuDepth = 0xf34f71f3800 "", m_transformSkip = {0xf34f71f3900 "", 0xf34f71f3a00 "", 0xf34f71f3b00 ""}, m_cbf = {0xf34f71f3c00 "",
0xf34f71f3d00 "", 0xf34f71f3e00 ""}, m_chromaIntraDir = 0xf34f71f2d00 '\377' <repeats 200 times>..., m_trCoeff = {0xf34f4a5e000, 0xf34f4a60000, 0xf34f4a60800}, m_mv = {
0xf34fc8de000, 0xf34fc8de400}, m_mvd = {0xf34fc8de800, 0xf34fc8dec00}, m_cuAboveLeft = 0x0, m_cuAboveRight = 0x0, m_cuAbove = 0x0, m_cuLeft = 0x0}
tuDepthRange = {5, 5}
bCodeDQP = false
Comments (48)
-
reporter -
Very odd. You can disable the AVX code paths by using --asm
Let us know what you find. Whats your ffmpeg build and encode commandline?
-
reporter I don't mean disabling AVX via the x265 program but utilizing x265 via FFmpeg. Is there a way of disabling just AVX at build time (and leaving SSE enabled)?
-
reporter It looks like FFmpeg's -cpuflags doesn't make any difference with x265. I assume without looking at the code that x265 does CPU feature detection independently of FFmpeg.
-
Yes, x265 does detect CPU independently.
There's no way/no need to disable specific instruction sets at build time (unless you dont have yasm, in which case ENABLE_ASSEMBLY is turned off). I haven tried this with ffmpeg, but I assume you could pass the asm flag as part of the x265 option list.
-
reporter I see crashing also with the x265 program. If I disable most of the SIMD code except for MMX and SSE, as in SSE2 or newer then it encodes fine. I also noticed the SIGILL with XOP.
$ x265 --asm "mmx2,sse,sse2" cats.y4m cats.hevc y4m [info]: 592x320 fps 25/1 i420p8 sar 1:1 frames 0 - 975 of 976 raw [info]: output file: cats.hevc x265 [info]: HEVC encoder version 1.8+43-04575a459a16 x265 [info]: build info [OpenBSD][clang 3.5.0][64 bit] 8bit x265 [info]: using cpu capabilities: MMX2 SSE2 x265 [info]: Main profile, Level-2.1 (Main tier) x265 [info]: Thread pool created using 4 threads x265 [info]: frame threads / pool features : 2 / wpp(5 rows) x265 [info]: Coding QT: max CU size, min CU size : 64 / 8 x265 [info]: Residual QT: max TU size, max depth : 32 / 1 inter / 1 intra x265 [info]: ME / range / subpel / merge : hex / 57 / 2 / 2 x265 [info]: Keyframe min / max / scenecut : 25 / 250 / 40 x265 [info]: Lookahead / bframes / badapt : 20 / 4 / 2 x265 [info]: b-pyramid / weightp / weightb : 1 / 1 / 0 x265 [info]: References / ref-limit cu / depth : 3 / 0 / 0 x265 [info]: AQ: mode / str / qg-size / cu-tree : 1 / 1.0 / 32 / 1 x265 [info]: Rate Control / qCompress : CRF-28.0 / 0.60 x265 [info]: tools: rd=3 psy-rd=0.30 signhide tmvp strong-intra-smoothing x265 [info]: tools: deblock sao Segmentation fault (core dumped)
Program terminated with signal SIGILL, Illegal instruction. #0 0x0000126665d385f1 in x265_frame_init_lowres_core_xop () from /usr/local/lib/libx265.so.5.0 Program terminated with signal SIGSEGV, Segmentation fault. #0 0x0000012e7faf68fc in x265_mbtree_propagate_cost_avx2 () from /usr/local/lib/libx265.so.5.0 Program terminated with signal SIGSEGV, Segmentation fault. #0 0x00001048c1a3a86c in x265_mbtree_propagate_cost_avx () from /usr/local/lib/libx265.so.5.0 Program terminated with signal SIGSEGV, Segmentation fault. #0 0x00000c13a0ecd7c3 in x265_mbtree_propagate_cost_sse2 () from /usr/local/lib/libx265.so.5.0
-
reporter - changed title to 1.8 or latest code crashes in SIMD code
- edited description
-
In above report, it is my AVX version of mb_tree_propageate_cost bug, I was fixed it in last Friday.
-
reporter No, this issue has not been fixed. I am using the latest code.
-
I sent a patch to fix constant read overflow bug, could you try again?
btw: What's your platform, OS X or Linux?
-
reporter Where is this patch located? I could try it if I am pointed in the right direction.
Neither, OpenBSD.
-
You can get from here: https://patches.videolan.org/patch/10446/
to apply the patch:
hg --import --no-commit https://patches.videolan.org/patch/10446/raw
-
reporter With that patch applied it is still crashing in the same function.
-
Account Deleted Can you provide your CPU information ? Also, can you share the command prompt output for "using cpu capabilities" without "--asm" option (default x265 detected cpu capabilities)?
-
reporter cpu0: Intel(R) Core(TM) i7-4600U CPU @ 2.10GHz, 1995.65 MHz cpu0: FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,LONG,LAHF,ABM,PERF,ITSC,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,SENSOR,ARAT x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX AVX2 FMA3 LZCNT BMI2
-
Your Core i7-4600U should support AVX2, according to ARK... http://ark.intel.com/products/76616/Intel-Core-i7-4600U-Processor-4M-Cache-up-to-3_30-GHz x265 thinks it should, but OpenBSD is reporting that AVX2 is not supported.
-
reporter No, OpenBSD is not reporting that AVX2 is not supported. It works fine and worked fine with 1.7. AVX2 code works fine with other programs / libraries such as FFmpeg, x264 and libvpx. The crashing also is not specific to AVX code as shown above.
-
Are you means the 1.8 just crash on AVX2? Could you tell us which instruction made it? Thanks.
-
reporter From the post above where I am testing with the x265 program the crashing is not specific to the AVX2 code path. Crashing also happens with AVX and SSE2.
-
Oh... sorry, I missed the AVX2 in the long string above.
-
OS X 10.11 no crash with x265 cmdline. Of course, I haven't your sequence, I use 720p50_parkrun_ter.y4m and city_4cif_60fps.y4m
-
Account Deleted We are not able to reproduce this issue at our end. If possible, can you give access to your machine so we can check the problem ? We are trying to install OpenBSD 5.8 to further check this issue. Can you share the test-sequence you have used for encoding ?
-
reporter I'll get back to you on the weekend when I have time to do further testing on my test system and provide access to it. There is nothing special about the test sequence. It's just converting any input video via either FFmpeg or the x265 program to HEVC.
-
running avx2 code on avx CPU. "Feature" added in r11101.
=> 0x00007ffff57fc000 <x265_mbtree_propagate_cost_avx+16>: vbroadcasti128 0x324ef7(%rip),%ymm5 # 0x7ffff5b20f00
-
This is my tree merge problem, I sent a new patch to solve it.
-
reporter Testing on my test box (as opposed to my laptop) I also see the same crashing though with only SSE2 as this system doesn't have AVX / AVX2. So if anyone wants access to this system to play around with it let me know via e-mail.. brad at comstyle.com
$ egdb x265 x265.core GNU gdb (GDB) 7.10 Copyright (C) 2015 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "x86_64-unknown-openbsd5.8". Type "show configuration" for configuration details. For bug reporting instructions, please see: <http://www.gnu.org/software/gdb/bugs/>. Find the GDB manual and other documentation resources online at: <http://www.gnu.org/software/gdb/documentation/>. For help, type "help". Type "apropos word" to search for commands related to "word"... Reading symbols from x265...done. [New process 615] [New process 4367] [New process 29347] [New process 23420] [New process 21565] Core was generated by `x265'. Program terminated with signal SIGSEGV, Segmentation fault. #0 x265_mbtree_propagate_cost_sse2.loop () at /usr/ports/pobj/x265-1.9/x265_1.9/source/common/x86/mc-a2.asm:1008 1008 movh m2, [r2+r5*4] ; intra [Current thread is 1 (process 615)]
-
Could you double check C code? Crash On this line, maybe input 'len' have some problem, may you show all registers and parameters? Thanks
-
reporter Using --no-asm x265 is able to encode content just fine.
(gdb) info registers rax 0x128856e89620 20376782935584 rbx 0xafc 2812 rcx 0x1288882ecd7e 20377609620862 rdx 0x128859f94f6c 20376834363244 rsi 0x12886ab77000 20377115258880 rdi 0x128853f3a500 20376733328640 rbp 0x128808026d70 0x128808026d70 rsp 0x128808026bf8 0x128808026bf8 r8 0x128842146f6c 20376433487724 r9 0x24 36 r10 0x1288882ecd7e 20377609620862 r11 0x128842146f6c 20376433487724 r12 0x12888030dd00 20377475538176 r13 0x12888fe86650 20377739224656 r14 0x12880802e9d0 20375459260880 r15 0x25 37 rip 0x128856e89643 0x128856e89643 <x265_mbtree_propagate_cost_sse2.loop> eflags 0x10297 [ CF PF AF SF IF RF ] cs 0x2b 43 ss 0x23 35 ds 0x23 35 es 0x23 35 fs 0x23 35 gs 0x23 35 (gdb) bt full #0 x265_mbtree_propagate_cost_sse2.loop () at /home/ports/pobj/x265-1.9/x265_1.9/source/common/x86/mc-a2.asm:1008 No locals. #1 0x0000128856e09666 in x265::Lookahead::estimateCUPropagate (this=0x1287aa132e00, frames=0x12880802e9d0, averageDuration=0.040000000000000008, p0=17, p1=19, b=18, referenced=0) at /home/ports/pobj/x265-1.9/x265_1.9/source/encoder/slicetype.cpp:1737 cuIndex = 703 blocky = 19 refCosts = {0x128831c75800, 0x1287b0119000} distScaleFactor = 128 bipredWeight = 32 bipredWeights = {32, 32} listDist = {0, 0} propagateCost = 0x12886ab77000 fpsFactor = 0.99999999999999978 strideInCU = 37
-
r2 -> rdx, r5 -> r9, looks right value, could you give me your sequence? I may debug and fix it
-
reporter What do you mean by sequence?
-
Account Deleted i think, he is asking the test-sequence(input video) you have used for encoding.
-
reporter If that is what he meant then there is nothing special about what I am doing. I just convert any video I have around to Y4M using FFmpeg ("ffmpeg -i input.[mkv|mp4|avi] output.y4m") and then just run "x265 input.y4m output.hevc".
-
Yes, I want to your input.y4m. clip it to minimum frames is better
-
reporter -
Thank your data, I test it in Windows7 and RHEL7, no crash on my Haswell 4770K
-
reporter That's not surprising at all. As I said earlier, if anyone wants access to my test box to see what's up let me know and I will provide you with access.
-
Hope India team can help us try it with your OpenBSD, if there can reproduce it, I can fix it soon.
-
Account Deleted We have installed OpenBSD here. Surprisingly, I am seeing crash with "--no-asm" option as well. I am debugging the issue to find the root cause.
-
reporter Curious, did you install amd64 or i386?
-
Account Deleted I installed i386.
-
reporter Ok, the reason I ask is i386 sometimes exposes issues that won't show up on amd64. Is the crash you're seeing with --no-asm happen to be a stack protector related crash?
-
Account Deleted GNU gdb (GDB) 7.9.1 Copyright (C) 2015 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. Type "show copying" and "show warranty" for details. This GDB was configured as "i386-unknown-openbsd5.8". Type "show configuration" for configuration details. For bug reporting instructions, please see: <http://www.gnu.org/software/gdb/bugs/>. Find the GDB manual and other documentation resources online at: <http://www.gnu.org/software/gdb/documentation/>. For help, type "help". Type "apropos word" to search for commands related to "word"... Reading symbols from x265...done. [New process 26331] [New process 16640] [New process 16995] [New process 25660] [New process 29411] [New process 5564] [New process 17584] [New process 19230] Core was generated by `x265'. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x15d8750d in (anonymous namespace)::_sa8d_8x8 ( pix1=0x78ac5400 "mjlmlmlprtrsustuwvvvvvvtuxxzzyy|llklmlmpqtsustuxyxwwyxytutyyxxzykimkmmpnrrssttv{|ywxz{zvwxxyzzxylklmmonprtrtvtvy|{zzzy~ttvxy{yz~mimlkplqpsttsuux|z{z|||uwwzzzz{{lmlmllmrruuuwuwy{|{{{|~wvwxzz{z{mlmlmnos"..., i_pix1=32, pix2=0x20 <error: Cannot access memory at address 0x20>, i_pix2=0) at /home/mcw/projects/x265/source/common/pixel.cpp:283 283 a0 = pix1[0] - pix2[0];
-
I guess problem on OpenBSD type 'intptr_t'. @Dnyaneshwar, could you try to modify 'intptr_t' to 'int' and check again.
-
FreeBSD 10.2 (32-bits), gcc 4.8 and llvm, can't reproduce
-
Account Deleted https://patches.videolan.org/patch/10684/raw
Can you apply the above patch and check if there is still crash ?
-
reporter With the latest code plus that patch it resolves the crashing issues. I can now encode content using x265 with SSE2 / AVX / AVX2 as well as via FFmpeg. Thank you very much.
-
- changed status to resolved
asm: fix mbtree_propagate_cost asm failure, fixes
#204The SSE2 asm code reads and write extra 4 bytes if loop counter is not multiple of 2 as SSE2 asm code process 2 int values in single iteration
The AVX asm code reads and write extra 4,8 or 12 bytes if loop counter is not multiple of 4 as AVX asm code process 4 int values in single iteration
→ <<cset a95e4de632bd>>
-
Thanks, folks - patch queued.
- Log in to comment