From: Project Hentai AI Date: Tue, 10 Jan 2023 21:03:08 +0000 (+0100) Subject: foo X-Git-Url: https://git.hentai-ai.org/?a=commitdiff_plain;h=HEAD;p=papers%2FNEKO.git%2F.git foo --- diff --git a/NEKO.aux b/NEKO.aux index 3b75a77..e442a25 100644 --- a/NEKO.aux +++ b/NEKO.aux @@ -1,8 +1,4 @@ \relax -\bibdata{ref} -\bibstyle{plain} -\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Furry chart with kemonomimi on the far left.}}{1}\protected@file@percent } -\newlabel{fig:furrychart}{{1}{1}} \@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}\protected@file@percent } \newlabel{sec:intro}{{I}{1}} \@writefile{toc}{\contentsline {section}{\numberline {II}Background}{1}\protected@file@percent } @@ -11,22 +7,28 @@ \newlabel{sec:dl}{{\mbox {II-A}}{1}} \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-B}}Kemonomimi}{1}\protected@file@percent } \newlabel{sec:kemonomimi}{{\mbox {II-B}}{1}} -\@writefile{toc}{\contentsline {section}{\numberline {III}Method}{1}\protected@file@percent } -\newlabel{sec:method}{{III}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Data Collection}{1}\protected@file@percent } -\newlabel{sec:datacollection}{{\mbox {III-A}}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}fast.ai}{1}\protected@file@percent } -\newlabel{sec:fastai}{{\mbox {III-B}}{1}} -\@writefile{toc}{\contentsline {section}{\numberline {IV}Design}{1}\protected@file@percent } -\newlabel{sec:design}{{IV}{1}} -\@writefile{toc}{\contentsline {section}{\numberline {V}Implementation}{1}\protected@file@percent } -\newlabel{sec:implementation}{{V}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-A}}Deep Learning with fast.ai}{1}\protected@file@percent } -\newlabel{sec:impl_deeplearning}{{\mbox {V-A}}{1}} -\@writefile{toc}{\contentsline {section}{\numberline {VI}Discussion}{1}\protected@file@percent } -\newlabel{sec:discussion}{{VI}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-A}}Limitations}{1}\protected@file@percent } -\newlabel{sec:limitations}{{\mbox {VI-A}}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-B}}Future Work}{1}\protected@file@percent } -\newlabel{sec:futurework}{{\mbox {VI-B}}{1}} -\@writefile{toc}{\contentsline {section}{\numberline {VII}Conclusion}{1}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Furry chart with kemonomimi on the far left.}}{1}\protected@file@percent } +\newlabel{fig:furrychart}{{1}{1}} +\bibdata{ref} +\bibstyle{plain} +\@writefile{toc}{\contentsline {section}{\numberline {III}Method}{2}\protected@file@percent } +\newlabel{sec:method}{{III}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-A}}Data Collection}{2}\protected@file@percent } +\newlabel{sec:datacollection}{{\mbox {III-A}}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {III-B}}fast.ai}{2}\protected@file@percent } +\newlabel{sec:fastai}{{\mbox {III-B}}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {IV}Implementation}{2}\protected@file@percent } +\newlabel{sec:implementation}{{IV}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {V}Results}{2}\protected@file@percent } +\newlabel{sec:results}{{V}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-A}}Training on Thumbnails}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-B}}Inter-Distinguishability of Kemonomimi}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-C}}Intra-Distinguishability of Kemonomimi}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-D}}Kemonomimi Multi-classifier}{2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{\numberline {VI}Discussion}{2}\protected@file@percent } +\newlabel{sec:discussion}{{VI}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-A}}Limitations}{2}\protected@file@percent } +\newlabel{sec:limitations}{{\mbox {VI-A}}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-B}}Future Work}{2}\protected@file@percent } +\newlabel{sec:futurework}{{\mbox {VI-B}}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {VII}Conclusion}{2}\protected@file@percent } diff --git a/NEKO.log b/NEKO.log index 41eb0b8..8edc353 100644 --- a/NEKO.log +++ b/NEKO.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2022.6.23) 23 JUL 2022 18:53 +This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2022.6.23) 10 JAN 2023 22:02 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -283,14 +283,14 @@ Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv e )) -LaTeX Font Info: Calculating math sizes for size <11> on input line 21. -LaTeX Font Info: Trying to load font information for U+msa on input line 21. +LaTeX Font Info: Calculating math sizes for size <11> on input line 25. +LaTeX Font Info: Trying to load font information for U+msa on input line 25. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsa.fd File: umsa.fd 2013/01/14 v3.01 AMS symbols A ) -LaTeX Font Info: Trying to load font information for U+msb on input line 21. +LaTeX Font Info: Trying to load font information for U+msb on input line 25. (/usr/share/texlive/texmf-dist/tex/latex/amsfonts/umsb.fd @@ -306,6 +306,10 @@ Overfull \hbox (6.0pt too wide) in paragraph at lines 52--53 [][] [] +[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} + + + <./img/furrychart.jpg>] No file NEKO.bbl. ** Conference Paper ** @@ -318,29 +322,24 @@ Before submitting the final camera ready copy, remember to: uses only Type 1 fonts and that every step in the generation process uses the appropriate paper size. -[1{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map} - - - <./img/furrychart.jpg>] -(./NEKO.aux) ) +[2] (./NEKO.aux) ) Here is how much of TeX's memory you used: - 3799 strings out of 481239 - 54421 string characters out of 5920377 - 297657 words of memory out of 5000000 - 19035 multiletter control sequences out of 15000+600000 + 3798 strings out of 481239 + 54399 string characters out of 5920377 + 299644 words of memory out of 5000000 + 19034 multiletter control sequences out of 15000+600000 574665 words of font info for 110 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 - 30i,11n,37p,413b,423s stack positions out of 5000i,500n,10000p,200000b,80000s -{/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc} -Output written on NEKO.pdf (1 page, 308166 bytes). + 30i,11n,37p,884b,423s stack positions out of 5000i,500n,10000p,200000b,80000s +{/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc} +Output written on NEKO.pdf (2 pages, 306306 bytes). PDF statistics: - 30 PDF objects out of 1000 (max. 8388607) - 20 compressed objects within 1 object stream + 29 PDF objects out of 1000 (max. 8388607) + 19 compressed objects within 1 object stream 0 named destinations out of 1000 (max. 500000) 6 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/NEKO.pdf b/NEKO.pdf index 8a12b08..bb00b49 100644 Binary files a/NEKO.pdf and b/NEKO.pdf differ diff --git a/NEKO.tex b/NEKO.tex index fdbc96c..813da28 100644 --- a/NEKO.tex +++ b/NEKO.tex @@ -16,6 +16,10 @@ \author{\IEEEauthorblockN{1\textsuperscript{st} hentai-ai} \IEEEauthorblockA{\textit{Project Hentai AI} \\ \url{https://www.hentai-ai.org}} +\and +\IEEEauthorblockN{2\textsuperscript{nd} ChatGPT} +\IEEEauthorblockA{\textit{Open AI} \\ +\url{https://chat.openai.com/}} } \maketitle @@ -33,61 +37,63 @@ deep learning, kemonomimi, anime, animal ears, lewd, ecchi, hentai \section{Background} \label{sec:background} \subsection{Deep Learning} \label{sec:dl} +Deep learning is a subfield of machine learning that is inspired by the structure and function of the brain, specifically the neural networks that make up the brain. It involves training artificial neural networks on a large dataset, allowing the network to learn and make intelligent decisions on its own. Deep learning has been highly successful in a variety of applications, such as image and speech recognition, natural language processing, and even playing board games. It has outperformed traditional machine learning techniques in these areas due to its ability to learn and represent complex patterns in data. + +The basic building block of a deep learning model is the artificial neuron, which is inspired by the biological neuron in the brain. These artificial neurons are organized into layers, with each layer learning to recognize a particular pattern or feature in the data. The first layer might learn to recognize simple features, such as edges or shapes, while the second layer might learn to recognize more complex patterns using the features learned by the first layer. This hierarchical structure allows deep learning models to learn and represent increasingly complex patterns in the data. + +One of the key aspects of deep learning is the use of large labeled datasets to train the model. This allows the model to learn the relationships between the input data and the desired output, allowing it to make predictions on new, unseen data. Deep learning has also been successful due to the development of efficient algorithms and hardware specifically designed for training large neural networks. This has allowed for the creation of deeper and more complex models, leading to even better performance on a variety of tasks. + +Overall, deep learning has revolutionized the field of machine learning and has led to significant advances in a variety of applications. It has the potential to continue to drive innovation and lead to new breakthroughs in the future. \subsection{Kemonomimi} \label{sec:kemonomimi} -The term \emph{Kemonomimi} comes from the two japanese words for beast/animal (kemono) and ear (mimi). -According to Urban Dictionary\footnote{\url{https://www.urbandictionary.com/define.php?term=Kemonomimi}} the term refers to: - -\begin{quote} - \emph{a person with animalistic characteristics who is NOT a furry - These characteristics include, but are not limited to cat ears/tail, - fox ears/tail, puppy ears/tail, bunny ears/tail, deer/doe ears/tail, - and many more animals. These human animal hybrids can also have horns, - fangs, whiskers, and different color patterns. - Kemonomimi and Furries are two different things.} -\end{quote} - -It notably mentioned multiple times by Urban Dictionary that there is a distinction between kemonomimi and furry which is illustrated in Figure~\ref{fig:furrychart}. +The term \emph{Kemonomimi} comes from the two japanese words for beast/animal (kemono) and ear (mimi), and is a term that refers to characters or people with animalistic features, such as cat or bunny ears. These features can include ears, tails, horns, fangs, and whiskers, and may be accompanied by different color patterns. It is important to note that kemonomimi should not be confused with the term "furry," which refers to a subculture of people who enjoy dressing up as anthropomorphic animals. Figure~\ref{fig:furrychart} illustrates the distinction between kemonomimi and furry. + \begin{figure} \includegraphics[width=.5\textwidth]{img/furrychart.jpg} \caption{Furry chart with kemonomimi on the far left.} \label{fig:furrychart} \end{figure} -Kemonomimi can then be split into subcategories based on the animal. In this study the following categories of kemonomimi are within scope: -\begin{itemize} - \item Nekomimi (cat) - \item Inumimi (dog) - \item Usagimimi (bunny) - \item Okamimimi (wolf) - \item Kitsunemimi (fox) -\end{itemize} -We will also in this study focus on the ear feature and less on the tail. -Sometimes a character does not have biological animal ears on their head, but is instead wearing a headband with ears attached. Other times a character might both have normal human ears as well as biological animals. In this study we make no distinction and include all types. Training an AI to detect animal ear headbands is considered for future work in Section~\ref{sec:futurework}. +One of the unique features of kemonomimi is the ability to blend human and animal traits in a single character. This combination can convey a variety of meanings and themes, depending on the specific animal characteristics that are chosen. For example, a character with cat ears may be portrayed as cunning and agile, while a character with bunny ears may be depicted as cute and energetic. In addition to the symbolic meanings of different animal traits, kemonomimi characters may also be used to create a sense of otherness or otherworldliness, as they challenge the traditional boundaries between humans and animals. This can be particularly appealing in the context of fictional worlds or fantastical settings. Overall, the use of kemonomimi adds a rich layer of meaning and symbolism to characters and can contribute to the immersive quality of a story or work of art. + +In this study, we will focus on five specific categories of kemonomimi: nekomimi (cat), inumimi (dog), usagimimi (bunny), okamimimi (wolf), and kitsunemimi (fox). We will primarily examine the ear feature of these characters, although we will also consider the presence of tails. It is worth noting that some characters may not have authentic animal ears on their head, but may instead be wearing a headband with attached ears. Similarly, some characters may possess both human and animal ears. In this study, we will include all types of kemonomimi in our analysis, although we plan to explore the detection of animal ear headbands as a topic for future work (see Section~\ref{sec:futurework}). \section{Method} \label{sec:method} -First we establish if a model trained on low resolution images (thumbnails) can make accurate predictions on high resolution images (original). -This study can then be further split into three challenges. Can an AI learn to differentiate between: -\begin{enumerate} - \item Kemonomimi and Non-kemonomimi - \item Two types of kemonomimi - \item Multiple types of kemonomimi -\end{enumerate} +The goal of this study is to determine if a model trained on low resolution images (thumbnails) can make accurate predictions on high resolution images (originals). To achieve this, we will use a dataset of images that includes both low resolution thumbnails and high resolution originals. The model will be trained on the thumbnails and tested on the high resolution images to determine its performance. We will use a variety of evaluation metrics, such as accuracy and precision, to measure the model's performance on each challenge. + +Our study can be broken down into three challenges. The first challenge is to determine whether the model can distinguish between kemonomimi and non-kemonomimi images. The second challenge is to determine whether the model can differentiate between two types of kemonomimi. For example, we might want to know if the model can correctly identify images of nekomimi (cat ears) versus images of usagimimi (bunny ears). To test this, we will present the model with a series of images that contain either nekomimi or usagimimi features, and evaluate its ability to correctly classify each image. Finally, the third challenge is to determine whether the model can correctly classify multiple types of kemonomimi. + +Overall, the results of this study will provide insights into the capabilities of deep learning models when applied to the task of kemonomimi classification, and will help to identify any potential limitations or challenges in using low resolution images for training. This information can inform the development of future models and improve our understanding of the usefulness of low resolution images for machine learning tasks. \subsection{Data Collection} \label{sec:datacollection} -The collection of images for the dataset of this study utilizes a script built around the API of Gelbooru\footnote{\url{https://gelbooru.com/}}. -% TODO write about Gelboory -% TODO write about the API +The Gelbooru API is a powerful tool for collecting large amounts of data from an anime image board of the same name\footnote{\url{https://gelbooru.com/}}. The API allows developers to access a vast repository of images and metadata associated with those images, making it an ideal resource for building labeled datasets for deep learning applications. + +To efficiently collect data using the Gelbooru API, we implemented a web scraper that is able to search for specific tags and retrieve a large number of images that match those tags. For example, we might search for the tag "nekomimi" to retrieve a dataset of images that contain cat ears, or we might search for the tag "kemonomimi" to retrieve a more general dataset of images with animalistic features. + +Instead of labeling each image, we sorted the images into folders using the tags as names. For example, all images with the "nekomimi" tag would be placed in a folder called "nekomimi," and all images with the "usagimimi" tag would be placed in a folder called "usagimimi." This allowed us to easily and efficiently organize the images into distinct categories. + +Overall, the Gelbooru API is a valuable resource for collecting large, labeled datasets for deep learning applications. Its powerful search capabilities and rich metadata make it an efficient and effective tool for building datasets that can be used to train and evaluate machine learning models. + % TODO write about the script \subsection{fast.ai} \label{sec:fastai} +Fast.ai is a research institute and online education platform that was founded in 2017. One of the key contributions of fast.ai is the development of the fastai library, which is a high-level interface for PyTorch. PyTorch is a powerful and flexible deep learning framework that offers a number of advanced features, such as support for distributed training and automatic differentiation. By using the fastai library, practitioners can take advantage of these features without having to deal with the complexity of PyTorch itself. + +The fastai library is designed to make the process of building and training deep learning models more intuitive and efficient. It provides a number of useful functions and abstractions that allow practitioners to easily build and train models without having to worry about the underlying technical details. For example, the library includes functions for loading and preprocessing data, creating and training models, and evaluating model performance. -\section{Design} \label{sec:design} \section{Implementation} \label{sec:implementation} All code is open source and can be found on GitWeb\footnote{\url{https://git.hentai-ai.org}} -\subsection{Deep Learning with fast.ai} \label{sec:impl_deeplearning} +\section{Results} \label{sec:results} +\subsection{Training on Thumbnails} +Compare accuracy of two models, one training on original images and the other training on the thumbnails. +\subsection{Inter-Distinguishability of Kemonomimi} +Kemonomimi VS Non-Kemonomimi +\subsection{Intra-Distinguishability of Kemonomimi} +Kemonomimi VS Kemonomimi +\subsection{Kemonomimi Multi-classifier} + \section{Discussion} \label{sec:discussion} diff --git a/img/fake_ears.png b/img/fake_ears.png new file mode 100644 index 0000000..8bcf707 Binary files /dev/null and b/img/fake_ears.png differ