@inproceedings{10.1145/3589334.3645619, author = {Ousat, Behzad and Schafir, Esteban and Hoang, Duc C. and Tofighi, Mohammad Ali and Nguyen, Cuong V. and Arshad, Sajjad and Uluagac, Selcuk and Kharraz, Amin}, title = {The Matter of Captchas: An Analysis of a Brittle Security Feature on the Modern Web}, year = {2024}, isbn = {9798400701719}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3589334.3645619}, doi = {10.1145/3589334.3645619}, abstract = {The web ecosystem is a fast-paced environment. In this dynamic landscape, new security features are offered one after another to enhance the security and robustness of web applications and the operations they handle. This paper focuses on a fragile but still in-use security feature, text-based CAPTCHAs, that had been wildly used by web applications in the past to protect against automated attacks such as credential stuffing and account hijacking. The paper first investigates what it takes to develop automated scanners that can solve previously unseen text-based CAPTCHAs. We evaluated the possibility of developing and integrating a pre-trained CAPTCHA solver in the automated web scanning process without using a significantly large training dataset. We also perform an analysis of the impact of such autonomous scanners on CAPTCHA-enabled websites. Our analysis shows that solvable text-based CAPTCHAs on login, contact, and comment pages of websites are not uncommon. In particular, we identified over 3,100 text-based CAPTCHA websites in critical sectors such as finance, government, and health with hundreds of thousands of users. We showed that a web scanner with a pre-trained solver could solve more than 20\% of previously unseen CAPTCHAs in just one single attempt. This result is worrisome considering the substantial potential to autonomously run the operation across thousands of websites on a daily basis with minimal training. The findings suggest that the integration of autonomous scanning with pre-training and local optimization of models can significantly increase adversaries' asymmetric power to launch their attacks cheaper and faster.}, booktitle = {Proceedings of the ACM on Web Conference 2024}, pages = {1835–1846}, numpages = {12}, keywords = {automated attacks, captcha, web bots, web security}, location = {Singapore, Singapore}, series = {WWW '24} }