From: anon Date: Sun, 5 Jun 2022 18:54:18 +0000 (+0200) Subject: X1 waifu paper update X-Git-Url: https://git.hentai-ai.org/?a=commitdiff_plain;h=59c2cbd3f7616562d8c536b0a67ea3c00ca50a30;p=papers%2FwAiFu.git%2F.git X1 waifu paper update --- diff --git a/code/dataprep.py b/code/dataprep.py new file mode 100644 index 0000000..96e7dbf --- /dev/null +++ b/code/dataprep.py @@ -0,0 +1,40 @@ +import os +from PIL import Image + +path = "dataset_thighs" +label = "thighs" + +def CountDone(dataset_dir, label): + done_counter = 0 + for f in os.listdir(dataset_dir): + if f.startswith(label + "-"): + done_counter += 1 + return done_counter + +def ConvertFilesToPng(dataset_dir): + print(f"Converting images in {dataset_dir} to .png") + for f in tqdm(os.listdir(dataset_dir)): + if f.endswith(".jpg") or f.endswith(".jpeg"): + im1 = Image.open(os.path.join(dataset_dir, f)) + im1.save(os.path.join(dataset_dir, f.split('.')[0] + ".png")) + os.remove(os.path.join(dataset_dir, f)) + +def RenameFiles(dataset_dir, label): + print(f"Renaming images in {dataset_dir} to {label}-id") + counter = CountDone(dataset_dir, label) + image_list = [] + for f in tqdm(os.listdir(dataset_dir)): + if f.endswith(".png") or f.endswith(".PNG"): + src = os.path.join(dataset_dir, f) + if not f.startswith(label + "-"): + dst = os.path.join(dataset_dir, label + "-" + str(counter).zfill(4) + ".png") + os.rename(src, dst) + image_list.append(dst) + counter += 1 + else: + image_list.append(src) + return image_list + +ConvertFilesToPng(path) +file_list = RenameFiles(path, label) +print(len(file_list)) diff --git a/img/crop1.png b/img/crop1.png new file mode 100644 index 0000000..e3b803d Binary files /dev/null and b/img/crop1.png differ diff --git a/img/crop2.png b/img/crop2.png new file mode 100644 index 0000000..b9cead6 Binary files /dev/null and b/img/crop2.png differ diff --git a/img/tinder.png b/img/tinder.png new file mode 100644 index 0000000..c7dc3ae Binary files /dev/null and b/img/tinder.png differ diff --git a/wAiFu.aux b/wAiFu.aux index a135bb5..9fa7d92 100644 --- a/wAiFu.aux +++ b/wAiFu.aux @@ -26,9 +26,9 @@ \newlabel{sec:ai}{{\mbox {II-A}}{1}{Artificial Intelligence}{subsection.2.1}{}} \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Relation between Artificial Intelligence, Machine Learning and Deep Learning.}}{1}{figure.1}\protected@file@percent } \newlabel{fig:ai}{{1}{1}{Relation between Artificial Intelligence, Machine Learning and Deep Learning}{figure.1}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-B}}Hentai and Thighdeology}{1}{subsection.2.2}\protected@file@percent } -\newlabel{sec:hentai}{{\mbox {II-B}}{1}{Hentai and Thighdeology}{subsection.2.2}{}} \citation{thighdeology} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {II-B}}Hentai and Thighdeology}{2}{subsection.2.2}\protected@file@percent } +\newlabel{sec:hentai}{{\mbox {II-B}}{2}{Hentai and Thighdeology}{subsection.2.2}{}} \@writefile{toc}{\contentsline {section}{\numberline {III}Method}{2}{section.3}\protected@file@percent } \newlabel{sec:method}{{III}{2}{Method}{section.3}{}} \@writefile{toc}{\contentsline {section}{\numberline {IV}Design}{2}{section.4}\protected@file@percent } @@ -56,23 +56,38 @@ \@writefile{toc}{\contentsline {subsubsection}{\numberline {\mbox {V-A}2}Cropping Images}{3}{subsubsection.5.1.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-B}}Label App: Hentai Tinder}{3}{subsection.5.2}\protected@file@percent } \newlabel{sec:impl_labelapp}{{\mbox {V-B}}{3}{Label App: Hentai Tinder}{subsection.5.2}{}} -\@writefile{lol}{\contentsline {lstlisting}{csv/test1.csv}{4}{lstlisting.-1}\protected@file@percent } -\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Structure of output file using comma separated values}}{4}{figure.4}\protected@file@percent } -\newlabel{fig:csv}{{4}{4}{Structure of output file using comma separated values}{figure.4}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Visualization of overfitting (Andrew Ng's Machine Learning Coursera class)}}{4}{figure.5}\protected@file@percent } -\newlabel{fig:overfitting}{{5}{4}{Visualization of overfitting (Andrew Ng's Machine Learning Coursera class)}{figure.5}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Image before cropping with application}}{4}{figure.4}\protected@file@percent } +\newlabel{fig:crop1}{{4}{4}{Image before cropping with application}{figure.4}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Image after cropping with application}}{4}{figure.5}\protected@file@percent } +\newlabel{fig:crop2}{{5}{4}{Image after cropping with application}{figure.5}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {V-C}}Deep Learning with fastai}{4}{subsection.5.3}\protected@file@percent } \newlabel{sec:impl_deeplearning}{{\mbox {V-C}}{4}{Deep Learning with fastai}{subsection.5.3}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Hentai Tinder}}{4}{figure.6}\protected@file@percent } +\newlabel{fig:tinder}{{6}{4}{Hentai Tinder}{figure.6}{}} +\@writefile{lol}{\contentsline {lstlisting}{csv/test1.csv}{4}{lstlisting.-1}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Structure of output file using comma separated values}}{4}{figure.7}\protected@file@percent } +\newlabel{fig:csv}{{7}{4}{Structure of output file using comma separated values}{figure.7}{}} \@writefile{toc}{\contentsline {section}{\numberline {VI}Results}{4}{section.6}\protected@file@percent } \newlabel{sec:results}{{VI}{4}{Results}{section.6}{}} \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-A}}Justifying Additional Transforms}{4}{subsection.6.1}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VI-B}}Error Rate of Thighs}{4}{subsection.6.2}\protected@file@percent } -\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Training image without batch transforms}}{4}{figure.6}\protected@file@percent } -\newlabel{fig:wobt}{{6}{4}{Training image without batch transforms}{figure.6}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Training image with batch transforms}}{4}{figure.7}\protected@file@percent } -\newlabel{fig:wbt}{{7}{4}{Training image with batch transforms}{figure.7}{}} -\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Comparing with and without batch transforms on error\_rate, train\_loss and valid\_loss}}{4}{figure.8}\protected@file@percent } -\newlabel{fig:btgraph}{{8}{4}{Comparing with and without batch transforms on error\_rate, train\_loss and valid\_loss}{figure.8}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Training image without batch transforms}}{5}{figure.8}\protected@file@percent } +\newlabel{fig:wobt}{{8}{5}{Training image without batch transforms}{figure.8}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Training image with batch transforms}}{5}{figure.9}\protected@file@percent } +\newlabel{fig:wbt}{{9}{5}{Training image with batch transforms}{figure.9}{}} +\@writefile{toc}{\contentsline {section}{\numberline {VII}Discussion}{5}{section.7}\protected@file@percent } +\newlabel{sec:discussion}{{VII}{5}{Discussion}{section.7}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VII-A}}Limitations}{5}{subsection.7.1}\protected@file@percent } +\newlabel{sec:limitations}{{\mbox {VII-A}}{5}{Limitations}{subsection.7.1}{}} +\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces User Training}}{5}{table.1}\protected@file@percent } +\newlabel{tab:user-train}{{I}{5}{User Training}{table.1}{}} +\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces User Testing}}{5}{table.2}\protected@file@percent } +\newlabel{tab:user-test}{{II}{5}{User Testing}{table.2}{}} +\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Comparing with and without batch transforms on error\_rate, train\_loss and valid\_loss}}{5}{figure.10}\protected@file@percent } +\newlabel{fig:btgraph}{{10}{5}{Comparing with and without batch transforms on error\_rate, train\_loss and valid\_loss}{figure.10}{}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VII-B}}Future Work}{5}{subsection.7.2}\protected@file@percent } +\newlabel{sec:futurework}{{\mbox {VII-B}}{5}{Future Work}{subsection.7.2}{}} +\@writefile{toc}{\contentsline {section}{\numberline {VIII}Conclusion}{5}{section.8}\protected@file@percent } \bibdata{ref} \bibcite{tkinter}{1} \bibcite{zoom-advanced}{2} @@ -81,13 +96,4 @@ \bibcite{machinelearning}{5} \bibcite{thighdeology}{6} \bibstyle{plain} -\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces User stats}}{5}{table.1}\protected@file@percent } -\newlabel{tab:user-table}{{I}{5}{User stats}{table.1}{}} -\@writefile{toc}{\contentsline {section}{\numberline {VII}Discussion}{5}{section.7}\protected@file@percent } -\newlabel{sec:discussion}{{VII}{5}{Discussion}{section.7}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VII-A}}Limitations}{5}{subsection.7.1}\protected@file@percent } -\newlabel{sec:limitations}{{\mbox {VII-A}}{5}{Limitations}{subsection.7.1}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\mbox {VII-B}}Future Work}{5}{subsection.7.2}\protected@file@percent } -\newlabel{sec:futurework}{{\mbox {VII-B}}{5}{Future Work}{subsection.7.2}{}} -\@writefile{toc}{\contentsline {section}{\numberline {VIII}Conclusion}{5}{section.8}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{References}{5}{section*.2}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{References}{6}{section*.2}\protected@file@percent } diff --git a/wAiFu.log b/wAiFu.log index deba8a3..0472013 100644 --- a/wAiFu.log +++ b/wAiFu.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.10.22) 31 MAY 2022 11:22 +This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.10.22) 5 JUN 2022 20:53 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -477,67 +477,88 @@ Package pdftex.def Info: img/ai_diagram.pdf used on input line 54. <./img/ai_diagram.pdf>] +[2] pdfTeX warning: pdflatex (file ./img/thighs_diagram.drawio.pdf): PDF inclusion: found PDF version <1.7>, but at most version <1.5> allowed - + File: img/thighs_diagram.drawio.pdf Graphic file (type pdf) -Package pdftex.def Info: img/thighs_diagram.drawio.pdf used on input line 123. +Package pdftex.def Info: img/thighs_diagram.drawio.pdf used on input line 122. (pdftex.def) Requested size: 258.0pt x 161.67961pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 123--124 +Overfull \hbox (6.0pt too wide) in paragraph at lines 122--123 [][] [] -[2] LaTeX Font Info: Trying to load font information for OT1+pcr on input line 1 -35. - (/usr/share/texlive/texmf-dist/tex/latex/psnfss/ot1pcr.fd +34. +(/usr/share/texlive/texmf-dist/tex/latex/psnfss/ot1pcr.fd File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr. ) File: img/data_sets.png Graphic file (type png) -Package pdftex.def Info: img/data_sets.png used on input line 138. +Package pdftex.def Info: img/data_sets.png used on input line 137. (pdftex.def) Requested size: 258.0pt x 61.5058pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 138--139 +Overfull \hbox (6.0pt too wide) in paragraph at lines 137--138 [][] [] -(./csv/test1.csv) [3 <./img/thighs_diagram.drawio.pdf> <./img/data_sets.png (PN -G copy)>] - -File: img/overfitting.png Graphic file (type png) - -Package pdftex.def Info: img/overfitting.png used on input line 182. -(pdftex.def) Requested size: 232.19843pt x 87.40929pt. - + +File: img/crop1.png Graphic file (type png) + +Package pdftex.def Info: img/crop1.png used on input line 155. +(pdftex.def) Requested size: 154.80157pt x 164.05174pt. + +File: img/crop2.png Graphic file (type png) + +Package pdftex.def Info: img/crop2.png used on input line 161. +(pdftex.def) Requested size: 154.80157pt x 164.05174pt. + +LaTeX Warning: `h' float specifier changed to `ht'. + + +LaTeX Warning: `h' float specifier changed to `ht'. + + +File: img/tinder.png Graphic file (type png) + +Package pdftex.def Info: img/tinder.png used on input line 171. +(pdftex.def) Requested size: 154.80157pt x 234.61816pt. +[3 <./img/thighs_diagram.drawio.pdf> <./img/data_sets.png (PNG copy)>] + +LaTeX Warning: `h' float specifier changed to `ht'. + +(./csv/test1.csv) +Underfull \vbox (badness 1173) has occurred while \output is active [] + + File: img/no_batch_transform1.png Graphic file (type png) -Package pdftex.def Info: img/no_batch_transform1.png used on input line 188. +Package pdftex.def Info: img/no_batch_transform1.png used on input line 200. (pdftex.def) Requested size: 232.19843pt x 96.04327pt. - + File: img/with_batch_transform2.png Graphic file (type png) -Package pdftex.def Info: img/with_batch_transform2.png used on input line 194. +Package pdftex.def Info: img/with_batch_transform2.png used on input line 206. (pdftex.def) Requested size: 232.19843pt x 96.04327pt. - + File: img/with_vs_without_batch_transforms.png Graphic file (type png) Package pdftex.def Info: img/with_vs_without_batch_transforms.png used on inpu -t line 200. -(pdftex.def) Requested size: 258.0pt x 247.09715pt. +t line 212. +(pdftex.def) Requested size: 242.52063pt x 232.26645pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 200--201 -[][] - [] -[4 <./img/overfitting.png> <./img/no_batch_transform1.png> <./img/with_batch_tr -ansform2.png> <./img/with_vs_without_batch_transforms.png>] (./wAiFu.bbl) +LaTeX Warning: `h' float specifier changed to `ht'. + +[4 <./img/crop1.png> <./img/crop2.png> <./img/tinder.png>] [5 <./img/no_batch_t +ransform1.png> <./img/with_batch_transform2.png> <./img/with_vs_without_batch_t +ransforms.png>] (./wAiFu.bbl) ** Conference Paper ** Before submitting the final camera ready copy, remember to: @@ -549,36 +570,38 @@ Before submitting the final camera ready copy, remember to: uses only Type 1 fonts and that every step in the generation process uses the appropriate paper size. -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 238. -[5 +Package atveryend Info: Empty hook `BeforeClearDocument' on input line 271. +[6 ] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 238. +Package atveryend Info: Empty hook `AfterLastShipout' on input line 271. (./wAiFu.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 238. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 238. +Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 271. +Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 271. Package rerunfilecheck Info: File `wAiFu.out' has not changed. (rerunfilecheck) Checksum: 607914959793BD1A383D08B0B432B5EB;1439. -Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 238. +Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 271. ) Here is how much of TeX's memory you used: - 9463 strings out of 483183 - 139816 string characters out of 5966291 - 415079 words of memory out of 5000000 - 24256 multiletter control sequences out of 15000+600000 + 9483 strings out of 483183 + 140046 string characters out of 5966291 + 408176 words of memory out of 5000000 + 24269 multiletter control sequences out of 15000+600000 579147 words of font info for 116 fonts, out of 8000000 for 9000 14 hyphenation exceptions out of 8191 34i,11n,37p,1355b,1162s stack positions out of 5000i,500n,10000p,200000b,80000s {/usr/share/texlive/texmf-dist/fonts/enc/dvips/base/8r.enc} -Output written on wAiFu.pdf (5 pages, 1374624 bytes). +e/texmf-dist/fonts/type1/public/amsfonts/cm/cmmi10.pfb>< +/usr/share/texlive/texmf-dist/fonts/type1/urw/times/utmr8a.pfb> +Output written on wAiFu.pdf (6 pages, 2106818 bytes). PDF statistics: - 262 PDF objects out of 1000 (max. 8388607) - 225 compressed objects within 3 object streams - 59 named destinations out of 1000 (max. 500000) - 228 words of extra memory for PDF output out of 10000 (max. 10000000) + 291 PDF objects out of 1000 (max. 8388607) + 246 compressed objects within 3 object streams + 64 named destinations out of 1000 (max. 500000) + 238 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/wAiFu.pdf b/wAiFu.pdf index 248f34e..d6c9055 100644 Binary files a/wAiFu.pdf and b/wAiFu.pdf differ diff --git a/wAiFu.tex b/wAiFu.tex index accd203..3caf141 100644 --- a/wAiFu.tex +++ b/wAiFu.tex @@ -24,7 +24,7 @@ \maketitle \begin{abstract} - For too many years have the world of Artificial Intelligence and the world of Hentai been separate ecosystems in which they do not realize the powerful potential of an alliance. Project Hentai AI aims to bring Artificial Intelligence into the sphere of Hentai, Ecchi and Lewds. In this paper, we propose a Witty Artificial Intelligence Framework Utilization (wAiFu). This framework is built for processing and labeling data, as well as training machine learning models to rate images of lewd anime/manga and hentai. As a proof of concept, this framework is applied to images of lewd anime thighs labeled using a boolean method. A dataset is collected, processed and labeled before being loaded into a fastai implementation of a Convolutional Neural Network designed for Computer Vision. The retraining of a resnet34 model for 10 epoch resulted in an accuracy of 70\%, which is much better than a cointoss. + For too many years have the world of Artificial Intelligence and the world of Hentai been separate ecosystems in which they do not realize the powerful potential of an alliance. Project Hentai AI aims to bring Artificial Intelligence into the sphere of Hentai, Ecchi and Lewds. In this paper, we propose a Witty Artificial Intelligence Framework Utilization (wAiFu). This framework is built for processing and labeling data, as well as training machine learning models to classify images of lewd anime/manga and hentai based on subjective user rating. As a proof of concept, this framework is applied to images of lewd anime thighs labeled using a boolean method. A dataset of 1000 images is collected, processed and labeled before being loaded into a fastai implementation of a Convolutional Neural Network designed for Computer Vision. The retraining of a resnet34 model for 20 epochs using labels from three different users resulted in an accuracy of over 70\%. Furthermore, a couple of limitations were identified, most significantly the small size of the dataset could cause the model to overfit. As mitigation, the data was transformed in batches. Future work in Project Hentai AI will focus extra on upscaling the data collection phase. \end{abstract} \begin{IEEEkeywords} @@ -60,7 +60,7 @@ Machine Learning and Deep Learning falls under the discipline of Artificial Inte \subsection{Hentai and Thighdeology} \label{sec:hentai} For the purpose of this study and future studies in Project Hentai AI, the data in the datasets are categorised in three definitions: \emph{Hentai}, \emph{Ecchi} and \emph{Lewd}. -In its simplest definition, Hentai is simply anime and manga pornography and can be seen as the most extreme out of the three. Ecchi on the other hand, when used as an adjective, translates to ``sexy'', ``dirty'' or ``naughty'', and has been used to describe anime and manga with \emph{sexual overtones} (playful sexuality or softcore). Lewd in these studies is defined as \emph{sexual undertones}. A detailed differentiation between these three categories is planned for a separate study. Project Hentai AI includes ecchi and lewd as well even though the name of the project uses the term hentai. +In its simplest definition, Hentai is simply anime and manga pornography and can be seen as the most extreme out of the three. Ecchi on the other hand, when used as an adjective, translates to ``sexy'', ``dirty'' or ``naughty'', and has been used to describe anime and manga with \emph{sexual overtones} (playful sexuality or softcore). Lewd in these studies is defined as \emph{sexual undertones}. A detailed differentiation between the three categories is planned for a separate study. Project Hentai AI will include ecchi and lewds, even though the name of the project uses the term hentai. Thighdeology is the worship of thick anime thighs which has its Mecca on the Thighdeology subreddit~\cite{thighdeology}. The top two rules on the subreddit are: (1) All images must be thigh-focused and (2) No Pictures of Sex (Nudity is allowed). The second rule is a clear demonstration of the distinction between hentai and ecchi described above. The dataset used for wAiFu is images of lewd anime thighs in accordance with these two rules. @@ -72,10 +72,9 @@ The epigraph which crowns the website says it all: \section{Method} \label{sec:method} \emph{wAiFu} stands for Witty Artificial Intelligence Framework Utilization, and its goal is to standardize the process of creating a subjectively labeled dataset for machine learning. This means that a single set of images can be used as separate datasets depending on the subjective labeling. A system is set up for homogenizing the images (filename and file extensions), cropping the images to isolate the area of interest as much as possible and finally labeling the images using a separate file for mapping each filename to its subjective labeling. -% Maybe more here \section{Design} \label{sec:design} -The following section describes the design of wAiFu in its separate parts in detail. The data collection, the data preparation, the data labeling and finally the machine learning API. +The following section describes the design of wAiFu in its separate components in detail: the data collection, the data preparation, the data labeling and finally the machine learning API. \subsection{Data Collection} \label{sec:datacollection} \noindent A dataset of lewd anime thighs was manually collected from six separate sources: @@ -88,7 +87,7 @@ The following section describes the design of wAiFu in its separate parts in det \item Private Donations \end{itemize} -~\\\noindent After collection, the data was manually screened for (A) presence of thighs (B) image quality and (C) image \emph{cropability}. The presence of thighs simply implies that the image in question contains a section of the lower body of a humanoid character. The vast majority of the characters depicted in the images collected were of the feminine nature, although this was most likely due to the skewed ratio of feminine/masculine thighs from the sources themselves and not due to any discrimination during the manual collecting. This is further discussed within limitations in Section~\ref{sec:limitations}. +~\\\noindent After collection, the data was manually screened for (A) presence of thighs (B) image quality and (C) image \emph{cropability}. The presence of thighs simply implies that the image in question contains a section of the lower body of a humanoid character. The vast majority of the characters depicted in the images collected were of the feminine nature, although this was most likely due to the skewed ratio of feminine/masculine thighs from the sources themselves and not due to any discrimination during the manual collecting. This is included within future work in Section~\ref{sec:futurework}. Image quality refers to the resolution of the picture. When finding duplicates, the one with higher resolution was kept. Some images where included in the dataset even if the quality of the resolution was below average due to either its content or source. @@ -115,7 +114,7 @@ The data preparation implementation is detailed in Section~\ref{sec:datatfms}. The labeling of datasets in wAiFu is categorised in two different methods: \begin{itemize} \item Boolean labeling - \item Rate labeling + \item Scale labeling \end{itemize} The \emph{Boolean labeling} consist of two disjunctive values (e.g., True/False, Yes/No, Approved/Disapproved, 1/0) which is the closest to the reviews previously gotten when brokering pictures of anime thighs manually. An image would be sent and an Approved/Disapproved would be received in return. A diagram example is seen in Figure~\ref{fig:protocol}. @@ -125,7 +124,7 @@ The \emph{Boolean labeling} consist of two disjunctive values (e.g., True/False, \label{fig:protocol} \end{figure} -The \emph{Rate labeling} ranks the images on a scale (e.g., 0-10, 1-5, A-F). This could be considered to be an extension of Boolean labeling (which would be seen as a scale of 0-1) by adding float values in between. +The \emph{Scale labeling} ranks the images on a scale (e.g., 0-10, 1-5, A-F). This could be considered to be an extension of Boolean labeling (which would be seen as a scale of 0-1) by adding float values in between. The data labeling implementation is detailed in Section~\ref{sec:impl_labelapp} @@ -147,17 +146,32 @@ The code of all tools in Project Hentai AI is open source and can be found at \u The following section goes through the implementation of homogenizing the dataset, including renaming, changing extensions and cropping the images. \subsubsection{Convert and Rename} -% Add git link -Talk about the script for making the dataset homogeneous. +The homogenization of the dataset is done with mainly two functions: \texttt{RenameFiles()} and \texttt{ConvertFilesToPng()}. All image files in the dataset is renamed to the naming convention detailed in Section~\ref{sec:dataprep} unless already matching the pattern. The images are also converted to png-files if they are not. \subsubsection{Cropping Images} -The application for efficiently cropping the images manually was built ontop of a zooming-application~\cite{zoom-advanced} which utilizes tiling for increased performance. The frame border of the application window was set to a 1:1 aspect ratio with desired dimensions and could then easily be used to crop every image from a specified input directory, and put the cropped images in a separate (or in the same) destination directory. -% Add screenshot +The application for efficiently cropping the images manually was built ontop of a zooming-application~\cite{zoom-advanced} which utilizes tiling for increased performance. The frame border of the application window was set to a 1:1 aspect ratio with desired dimensions and could then easily be used to crop every image from a specified input directory, and put the cropped images in a separate (or in the same) destination directory. Figure~\ref{fig:crop1} and Figure~\ref{fig:crop2} shows before and after cropping with the application. +\begin{figure}[h] + \centering + \includegraphics[width=.3\textwidth]{img/crop1.png} + \caption{Image before cropping with application} + \label{fig:crop1} +\end{figure} +\begin{figure}[h] + \centering + \includegraphics[width=.3\textwidth]{img/crop2.png} + \caption{Image after cropping with application} + \label{fig:crop2} +\end{figure} The code is open source and can be found at: \url{https://git.hentai-ai.org/?p=hentai-cropper.git/.git} \subsection{Label App: Hentai Tinder} \label{sec:impl_labelapp} -The name of the label application is ``Hentai Tinder''. It is written in Python using the Tkinter library, a Python binding to the Tk GUI toolkit~\cite{tkinter}. The application loads the images in batches and presents each image to the user one-by-one. The GUI consists of four buttons, inspired by the original Tinder application: \emph{Smash, Pass, Go Back} and \emph{Save}. \emph{Smash} will label the image as ``True'' and \emph{Pass} will label it as ``False''. These are internal boolean values which represents if the user liked the image or not. \emph{Go Back} was added in order for users to change their mind about previous images. The \emph{Save} button simply writes the current results to the output file. The output of the Hentai Tinder application is a csv file which can be used in fastai to create a dataloader with all the images including their labels, see Figure~\ref{fig:csv}. -% Add screenshot +The name of the label application is ``Hentai Tinder'' (see Figure~\ref{fig:tinder}). It is written in Python using the Tkinter library, a Python binding to the Tk GUI toolkit~\cite{tkinter}. The application loads the images in batches and presents each image to the user one-by-one. The GUI consists of four buttons, inspired by the original Tinder application: \emph{Smash, Pass, Go Back} and \emph{Save}. \emph{Smash} will label the image as ``True'' and \emph{Pass} will label it as ``False''. These are internal boolean values which represents if the user liked the image or not. \emph{Go Back} was added in order for users to change their mind about previous images. The \emph{Save} button simply writes the current results to the output file. The output of the Hentai Tinder application is a csv file which can be used in fastai to create a dataloader with all the images including their labels, see Figure~\ref{fig:csv}. +\begin{figure}[h] + \centering + \includegraphics[width=.3\textwidth]{img/tinder.png} + \caption{Hentai Tinder} + \label{fig:tinder} +\end{figure} \begin{figure} \centering @@ -168,21 +182,19 @@ The name of the label application is ``Hentai Tinder''. It is written in Python The code is open source and can be found at: \url{https://git.hentai-ai.org/?p=hentai-tinder.git/.git} \subsection{Deep Learning with fastai} \label{sec:impl_deeplearning} -The deep learning framework (fastai) was implemented using interactive python notebooks running on Google Colab\footnote{url{https://colab.research.google.com}} connected to Google Drive\footnote{\url{https://drive.google.com}} for storing csv-files, dataset and trained models. +The deep learning framework (fastai) was implemented using interactive python notebooks running on Google Colab\footnote{\url{https://colab.research.google.com}} connected to Google Drive\footnote{\url{https://drive.google.com}} for storing csv-files, dataset and trained models. The notebook is open source and can be found at: \url{https://git.hentai-ai.org/?p=waifu-notebook.git/.git} % TODO add to git \section{Results} \label{sec:results} \subsection{Justifying Additional Transforms} -One of the main observations when training on such a small dataset was the tendency to overfitting (see Figure~\ref{fig:overfitting}). There are two types of transformations applied to the dataset before training: \texttt{item\_tfms} and \texttt{batch\_tfms}. The item\_tfms for this implementation is using \textit{RandomResizedCrop} which will crop every image randomly to 224x244 with a minimum scaling of 0.75. The batch\_tfms is applying many more tranformations to images in batches between each epoch. These transformations include: zooming, flipping, rotating and changing the brightness. Figure~\ref{fig:wobt} shows how \emph{only} item\_tfms transform the dataset. Figure~\ref{fig:wbt} shows how batch\_tfms additionally transforms the dataset further. Figure~\ref{fig:btgraph} shows the batch\_tfms's effect on error\_rate, train\_loss and valid\_loss. - +One of the main observations when training on such a small dataset was the tendency to overfitting. There are two types of transformations applied to the dataset before training: \texttt{item\_tfms} and \texttt{batch\_tfms}. The item\_tfms for this implementation is using \textit{RandomResizedCrop} which will crop every image randomly to 224x244 with a minimum scaling of 0.75. The batch\_tfms is applying many more tranformations to images in batches between each epoch. These transformations include: zooming, flipping, rotating and changing the brightness. Figure~\ref{fig:wobt} shows how \emph{only} item\_tfms transform the dataset. Figure~\ref{fig:wbt} shows how batch\_tfms additionally transforms the dataset further. Figure~\ref{fig:btgraph} shows the batch\_tfms's effect on error\_rate, train\_loss and valid\_loss. - -\begin{figure} - \fbox{\includegraphics[width=.45\textwidth]{img/overfitting.png}} - \caption{Visualization of overfitting (Andrew Ng's Machine Learning Coursera class)} - \label{fig:overfitting} -\end{figure} +%\begin{figure} +% \fbox{\includegraphics[width=.45\textwidth]{img/overfitting.png}} +% \caption{Visualization of overfitting (Andrew Ng's Machine Learning Coursera class)} +% \label{fig:overfitting} +%\end{figure} \begin{figure} \fbox{\includegraphics[width=.45\textwidth]{img/no_batch_transform1.png}} @@ -196,41 +208,62 @@ One of the main observations when training on such a small dataset was the tende \label{fig:wbt} \end{figure} -\begin{figure} - \includegraphics[width=.5\textwidth]{img/with_vs_without_batch_transforms.png} +\begin{figure}[h] + \includegraphics[width=.47\textwidth]{img/with_vs_without_batch_transforms.png} \caption{Comparing with and without batch transforms on error\_rate, train\_loss and valid\_loss} \label{fig:btgraph} \end{figure} \subsection{Error Rate of Thighs} -The dataset containing 1000 images was labled using Hentai Tinder (Section~\ref{sec:impl_labelapp}) by three individual persons: User A, User B and User C. A table of the result for each user can be seen in Table~\ref{tab:user-table}. The three different users had varying rates of approval on the dataset with user C liking almost half of the dataset. The lowest error\_rate observed came from the dataset labled by user B. With the error\_rate being close to the rate of approval, a sanity check with a confusion matrix showed that the model did not just predict false on the whole dataset. +The dataset containing 1000 images was labled using Hentai Tinder (Section~\ref{sec:impl_labelapp}) by three individual persons: User A, User B and User C. A table of the training result after 20 epochs for each user can be seen in Table~\ref{tab:user-train}. The three different users had varying rates of approval on the dataset with user C liking almost half of the dataset. The lowest error\_rate observed came from the dataset labled by user B. With the error\_rate being close to the rate of approval, a sanity check with a confusion matrix showed that the model did not just predict false on the whole dataset. +In Table~\ref{tab:user-test} we show the true/false positive/negative results on the testing set for each user. Furthermore we show the accuracy on the testing set using: + +\begin{equation} + \frac{TP+TN}{TP+TN+FP+FN} +\end{equation} -\begin{table*} +\begin{table} \centering -\begin{tabular}{l|ccccccc} - User & Approved & valid err & TP & FP & TN & FN & test err\\ \hline - A & 33.00\% & 26.87\% & 31 & 19 & 116 & 34 & \\ - B & 13.22\% & 22.00\% & 1 & 6 & 76 & 16 & \\ - C & 49.30\% & 28.36\% & 72 & 22 & 72 & 34 & +\caption{User Training} +\begin{tabular}{l|c|c|c|c} + User & Approved & train loss & valid loss & valid err \\ \hline + A & 33.00\% & 0.42 & 0.64 & 32\% \\ + B & 13.22\% & 0.33 & 0.53 & 20\% \\ + C & 49.30\% & 0.47 & 0.54 & 26\% \\ \end{tabular} -\caption{User stats} -\label{tab:user-table} -\end{table*} +\label{tab:user-train} +\end{table} + +\begin{table} +\centering +\caption{User Testing} +\begin{tabular}{l|c|c|c|c||c} + User & TP & FP & TN & FN & Accuracy \\ \hline + A & 11 & 13 & 67 & 9 & 78\% \\ + B & 1 & 3 & 96 & 0 & 97\% \\ + C & 44 & 8 & 26 & 22 & 70\% \\ +\end{tabular} +\label{tab:user-test} +\end{table} \section{Discussion} \label{sec:discussion} \subsection{Limitations} \label{sec:limitations} The size of the lewd anime thighs dataset is only 1000 images. This leads to overfitting on the training or the validation set which can be mitigated slightly by applying transformations. The small dataset is due to the time-consuming task of manually cropping and labeling the dataset. Since the model is trying to learn an individual's taste, that individual must label the full dataset. + \subsection{Future Work} \label{sec:futurework} In order to increase the size of the dataset and thereby obtaining a more robust accuracy from the machine learning model, future research in Project Hentai AI will spend some more focus on automating the collection, transformation and labeling of data. -In this study, only boolean labeling was considered when reviewing lewd anime thighs. But even in the world of Hentai thighs are more often than not in a gray-zone as opposed to black or white. A future work in wAiFu would be to extend the labeling application (\emph{Hentai Tinder}) to have a mode or a version capable of using rate labeling on a spectrum. This could be as easy as presenting the user with a 5-star system, similar to reviewing restaurants or hotels, where each image gets rated from 1-5. +In this study, only boolean labeling was considered when reviewing lewd anime thighs. But even in the world of Hentai thighs are more often than not in a gray-zone as opposed to black or white. A future work in wAiFu would be to extend the labeling application (\emph{Hentai Tinder}) to have a mode or a version capable of using rate labeling on a scale. This could be as easy as presenting the user with a 5-star system, similar to reviewing restaurants or hotels, where each image gets rated from 1-5. + +As metioned in Section~\ref{sec:datacollection}, the dataset mainly contained lewd feminine thighs. One area of future work would be to investigate the masculine/feminine feature ratio effect on the model accuracy robustness. \section{Conclusion} +In Project Hentai AI: wAiFu (Witty Artificial Intelligence Framework Utilization) we established a framework for processing and labeling data using our own newly developed tools. We then with fastai re-trained a Convolutional Neural Network to classify images of lewd anime thighs based on subjective ratings from three individual users with an accuracy over 70\%. Even though batch transforms where applied to mitigate overfitting, we believe that the dataset could still be too small. The size of the dataset is impacted by the pre-processing overhead (cropping) of the general dataset images, as well as the manual labeling for each new user. \section*{Acknowledgement} -We would like to thank Kittey for coming up with the name of the project: \emph{wAiFu}. We would also like to thank Hood Classic\#0148 for coming up with the name of the labeling app: \emph{Hentai Tinder}. +We would like to thank Kittey for coming up with the name of the project: \emph{wAiFu}. We would also like to thank Hood Classic\#0148 for coming up with the name of the labeling app: \emph{Hentai Tinder}. Finally we thank the three anonymous users in this study. \bibliography{ref} \bibliographystyle{plain}