{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-cTcaAa8YrUf", "outputId": "9fa89f37-96f0-4785-fff2-ef8dc8d3a7ae" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting sentence-transformers\n", " Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)\n", "Collecting num2words\n", " Downloading num2words-0.5.13-py3-none-any.whl.metadata (12 kB)\n", "Collecting deep-translator\n", " Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)\n", "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.42.4)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (4.66.4)\n", "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (2.3.1+cu121)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.26.4)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.3.2)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (1.13.1)\n", "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (0.23.5)\n", "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence-transformers) (9.4.0)\n", "Collecting docopt>=0.6.2 (from num2words)\n", " Downloading docopt-0.6.2.tar.gz (25 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: beautifulsoup4<5.0.0,>=4.9.1 in /usr/local/lib/python3.10/dist-packages (from deep-translator) (4.12.3)\n", "Requirement already satisfied: requests<3.0.0,>=2.23.0 in /usr/local/lib/python3.10/dist-packages (from deep-translator) (2.31.0)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4<5.0.0,>=4.9.1->deep-translator) (2.5)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (3.15.4)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (2024.6.1)\n", "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (24.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (6.0.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence-transformers) (4.12.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.23.0->deep-translator) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.23.0->deep-translator) (3.7)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.23.0->deep-translator) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3.0.0,>=2.23.0->deep-translator) (2024.7.4)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (1.13.1)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.3)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (3.1.4)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n", "Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.7 kB)\n", "Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence-transformers) (2.3.1)\n", "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers)\n", " Using cached nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (2024.5.15)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (0.4.3)\n", "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers) (0.19.1)\n", "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence-transformers) (3.5.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers) (2.1.5)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.3.0)\n", "Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading num2words-0.5.13-py3-none-any.whl (143 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.3/143.3 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading deep_translator-1.11.4-py3-none-any.whl (42 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.3/42.3 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hUsing cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", "Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", "Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", "Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", "Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", "Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", "Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", "Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", "Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", "Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", "Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", "Using cached nvidia_nvjitlink_cu12-12.6.20-py3-none-manylinux2014_x86_64.whl (19.7 MB)\n", "Building wheels for collected packages: docopt\n", " Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13704 sha256=5f4cdbf90c388501c3402aaded6f9321367f6e69e3f3fc84b322f77583029a24\n", " Stored in directory: /root/.cache/pip/wheels/fc/ab/d4/5da2067ac95b36618c629a5f93f809425700506f72c9732fac\n", "Successfully built docopt\n", "Installing collected packages: docopt, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, num2words, nvidia-cusparse-cu12, nvidia-cudnn-cu12, deep-translator, nvidia-cusolver-cu12, sentence-transformers\n", "Successfully installed deep-translator-1.11.4 docopt-0.6.2 num2words-0.5.13 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.6.20 nvidia-nvtx-cu12-12.1.105 sentence-transformers-3.0.1\n" ] } ], "source": [ "!pip install sentence-transformers num2words deep-translator" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "csA5EGF4cV0H", "outputId": "de59180a-318b-40cc-817c-402d2630d161" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n", "[nltk_data] Downloading package wordnet to /root/nltk_data...\n", "[nltk_data] Package wordnet is already up-to-date!\n", "[nltk_data] Downloading package averaged_perceptron_tagger to\n", "[nltk_data] /root/nltk_data...\n", "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n", "[nltk_data] date!\n", "[nltk_data] Downloading package punkt to /root/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] } ], "source": [ "import re\n", "import nltk\n", "from nltk.corpus import stopwords\n", "from nltk.tokenize import word_tokenize\n", "from nltk.stem import WordNetLemmatizer\n", "from num2words import num2words\n", "from deep_translator import GoogleTranslator\n", "from nltk.tag import pos_tag\n", "import spacy\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "nltk.download('stopwords')\n", "nltk.download('wordnet')\n", "nltk.download('averaged_perceptron_tagger')\n", "nltk.download('punkt')\n", "\n", "# translator = GoogleTranslator(source='id', target='en')\n", "\n", "nlp = spacy.load(\"en_core_web_sm\")\n", "\n", "# def split_text(text, max_length):\n", "# words = text.split()\n", "# parts = []\n", "# current_part = []\n", "\n", "# for word in words:\n", "# if len(' '.join(current_part + [word])) <= max_length:\n", "# current_part.append(word)\n", "# else:\n", "# parts.append(' '.join(current_part))\n", "# current_part = [word]\n", "\n", "# if current_part:\n", "# parts.append(' '.join(current_part))\n", "\n", "# return parts\n", "\n", "# def translate_batch(text, max_length=4000):\n", "# parts = split_text(text, max_length)\n", "# translated_parts = [translator.translate(part) for part in parts]\n", "# return ' '.join(translated_parts)\n", "\n", "# def remove_verbs(text):\n", "# doc = nlp(text)\n", "# non_verbs = [token.text for token in doc if token.pos_ not in [\"VERB\"]]\n", "# return ' '.join(non_verbs)\n", "\n", "def preprocessing_data(text):\n", " text = text.lower()\n", " # text = remove_verbs(text)\n", "\n", " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text, flags=re.MULTILINE)\n", " text = re.sub(r'[^\\w\\s]', ' ', text)\n", " text = text.replace('s1', 'bachelor')\n", " text = text.replace('s2', 'master')\n", " text = text.replace('s3', 'doctorate')\n", " text = text.replace('d3', 'associate degree')\n", " text = text.replace('d4', 'professional degree')\n", "\n", " pattern = r'\\b\\d+\\b'\n", "\n", " def replace_with_words(match):\n", " number = int(match.group())\n", " return num2words(number)\n", "\n", " text = re.sub(pattern, replace_with_words, text)\n", "\n", " text = re.sub(r'[^a-zA-Z0-9\\s]', '', text)\n", " text = text.replace('\\n', ' ')\n", " text = text.replace('etc', ' ')\n", "\n", " stop_words = set(stopwords.words('english'))\n", " tokens = word_tokenize(text)\n", " tokens = [word for word in tokens if word not in stop_words]\n", "\n", " lemmatizer = WordNetLemmatizer()\n", " tokens = [lemmatizer.lemmatize(word) for word in tokens]\n", "\n", " preprocessed_text = ' '.join(tokens)\n", "\n", " # preprocessed_text = remove_verbs(preprocessed_text)\n", "\n", " return preprocessed_text" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cpTbo-yDXMQr" }, "outputs": [], "source": [ "from sentence_transformers import SentenceTransformer, util\n", "\n", "model = SentenceTransformer(\"/content/drive/MyDrive/model/sbert/model_10k_5_2e5\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "7iBG3V_hdOEF" }, "outputs": [], "source": [ "cv = {\n", " \"CVRIL1 Data Science\" : \"areas interest deep learning control system design programming python electric machinery web development analytics technical activities hindustan aeronautics limited bangalore weeks guidance mr satish senior engineer hangar mirage fighter aircraft technical skills programming languages matlab python java web frameworks django flask simulation software ltspice intermediate mipower intermediate version control git gitbash github data analysis notebook tools jupyter notebook database management xampp mysql basics python software packages anaconda python two python three pycharm java ide eclipse operating systems windows ubuntu debian kali linux education details january two thousand nineteen btech electrical electronics engineering manipal institute technology january two thousand fifteen deeksha center january two thousand thirteen little flower public school august two thousand manipal academy higher education experience company themathcompany description currently working casino based operator macau responsibilities include segmenting customers based value bring company providing data backed insights improved segmentation target marketing strategy skill details data analysis less than one year excel less than one year machine learning less than one year mathematics less than one year python less than one year matlab less than one year electrical engineering less than one year sql less than one year\",\n", " \"CVRIL2 Python Developer\" : \"technical proficiencies platform ubuntu fedora cent os windows database mysql languages python tensorflow numpy c cplusplus education details january two thousand sixteen me computer engineering pune maharashtra savitribai phule pune university january two thousand fourteen be computer engineering pune maharashtra savitribai phule pune university january two thousand ten ryk science college maharashtra state board january two thousand eight maharashtra state board python developer python developer skill details cplusplus experience six months mysql experience six months python experience six months company details company fresher description python programming\",\n", " \"CVRIL3 Sales\" : \"education details bachelors bachelors commerce india guru nanak high school sales manager skill details data entry experience less than one year months cold calling experience less than one year months sales experience less than one year months salesforce experience less than one year months ms office experience less than one year months company details company emperor honda description company honda cars india ltd description worked asm maruti dealership ten years currently working manager sales honda car dealership last five years good sportsmen represent college various cricket tournaments lead nagpur university cricket team also searching job car dealership cricket academy\",\n", " \"CVGPT1 Data Science\" : \"I am an analytical and detail-oriented Data Scientist with a strong background in data analysis, visualization, and database management. I possess proficiency in tools such as SQL, Excel, Python, and R, with a keen ability to extract meaningful insights from large datasets. I am an excellent communicator capable of conveying complex data insights to non-technical audiences. As a fresh graduate with outstanding academic performance and hands-on experience in data projects, I am eager to contribute my skills and knowledge to a dynamic team. I hold a Bachelor of Science in Data Science from a reputable university, where I graduated in June 2024 with a GPA of 3.95/4.00. My relevant coursework includes Data Analysis, Data Visualization, SQL, Python Programming, R Programming, and Database Management.\",\n", " \"CVGPT4 FullStack Developer\" : \"Experienced Lead Developer and Fullstack Developer with over five years of professional experience in software development and two years in leadership roles. Proficient in Java, Spring Boot, and React.js, with a strong understanding of RESTful API design and implementation. Adept at managing and mentoring development teams, overseeing the design and development of scalable web applications, and implementing CI/CD pipelines. Excellent leadership, problem-solving, and communication skills, with a proven ability to motivate and inspire team members. Bachelor of Science in Computer Science Reputable University Graduated: June 2018 Relevant Coursework: Software Development, Database Management, Agile Methodologies, Cloud Computing\",\n", " # \"customer service \" : \"I am a dedicated and experienced customer service professional with a background in retail sales. I possess strong communication and problem-solving skills, with a proven ability to handle customer inquiries and drive sales. My educational background in history has equipped me with excellent research and analytical skills. I am looking for opportunities that align with my skills and interests, particularly in customer service and sales roles. Experience Customer Service Representative, XYZ Corporation, January 2020 - Present Provide excellent customer service by resolving complaints and answering inquiries. Ensure customer satisfaction by providing accurate information and timely solutions. Document and escalate issues to the relevant departments as needed. Maintain a positive and professional demeanor in all interactions. Retail Sales Associate, ABC Store, June 2018 - December 2019 Assisted customers with product selection and inquiries. Managed inventory and restocked shelves. Handled cash transactions and balanced the register at the end of the day. Participated in merchandising and promotional activities to drive sales. Bachelor of Arts in History, University of Somewhere, Graduated May 2018 Studied various historical periods and developed strong research and analytical skills. Completed a senior thesis on the impact of the Industrial Revolution on modern society.\",\n", " # \"ds - cs gpt\" : \"I am a motivated and detail-oriented professional with experience in data analysis and customer service. My background in economics and hands-on experience with data analysis tools have equipped me with strong analytical and problem-solving skills. I have a proven ability to work collaboratively with cross-functional teams and provide relevant insights to support business processes. I am seeking opportunities that allow me to leverage my skills in data analysis and contribute to impactful projects. Data Analyst Intern, ABC Corporation, June 2022 - Present Assisted in collecting, processing, and analyzing large datasets to extract meaningful insights. Generated monitoring reports to ensure a seamless reporting cycle. Collaborated with cross-functional teams to understand data needs and provide relevant insights. Performed data validation to ensure data accuracy and integrity. Utilized data analysis tools such as SQL and Excel to support business processes. Customer Service Representative, XYZ Company, January 2020 - May 2022 Provided exceptional customer service by resolving complaints and answering inquiries. Documented and escalated issues to the relevant departments. Maintained a positive and professional demeanor in all interactions. Developed strong communication and problem-solving skills through daily interactions with customers.\",\n", " # \"marketing gpt\" : \"I am a dedicated and results-driven professional with experience in marketing and sales. My background in communication and hands-on experience with marketing campaigns have equipped me with strong skills in customer service, content creation, and market research. I have a proven ability to develop and maintain client relationships, meet sales targets, and create effective marketing strategies. I am seeking opportunities that allow me to leverage my skills in marketing and sales to drive business growth and success. Marketing Coordinator, ABC Marketing Solutions, January 2018 - Present Coordinated and executed marketing campaigns for various clients. Developed and maintained relationships with clients, ensuring their needs and expectations were met. Conducted market research to identify trends and target audiences. Created content for social media, email campaigns, and websites. Monitored and analyzed campaign performance, making recommendations for improvement. Bachelor of Arts in Communication, University of Nowhere, Graduated May 2015 Studied various aspects of communication, including public relations, advertising, and media studies. Completed coursework in marketing, sales, and consumer behavior. Conducted a senior project on the impact of social media on consumer purchasing decisions.\",\n", " # \"fedev gpt\" : \"I am a highly motivated software developer with a strong background in front-end development and a passion for building user-friendly web applications. With over five years of professional experience in the tech industry, I have honed my skills in various programming languages and frameworks. Front-End Developer, Tech Solutions Inc., June 2018 - Present Developed and maintained the front-end of several web applications using React. Collaborated with back-end developers to integrate RESTful APIs. Participated in code reviews and provided constructive feedback to team members. Implemented responsive design principles to ensure optimal user experience across devices. Worked closely with UI/UX designers to translate design prototypes into functional code. Stayed updated with the latest industry trends and technologies. Web Developer, Creative Web Studio, January 2015 - May 2018 Built and maintained websites for various clients using HTML, CSS, and JavaScript. Enhanced website performance and speed through optimization techniques. Conducted usability testing and gathered user feedback to improve website functionality. Assisted in the creation of custom WordPress themes and plugins. Provided technical support and troubleshooting for website issues. Bachelor of Science in Computer Science, State University, Graduated May 2015 Studied core computer science concepts, including data structures, algorithms, and software engineering. Completed coursework in web development, database management, and network security. Engaged in team projects that involved the full software development lifecycle.\",\n", " }\n", "\n", "jd = {\n", " \"JDRIL1 IT Project Manager\" : \"qualifications minimum one year work experience project manager hold certification related project management professional pmp experience managing agile waterfall methodology projects banking area familiar using scrum method familiar able work collaboration tools monitor report achievement knowledge sdlc methodology knowledge iot technology good analytical thinking problem solving skill attention detail capability project management skill strong communication collaboration skills especially across customer countries good command english spoken written willing travel customer site job description lead manage various company strategic projects banking project implement execute pmbok project management body knowledge includes cost benefits calculations identify manage project risk others perform discipline monitoring achieve challenging targets quality deliveries per milestone monitor bast invoicing achievable per task milestone project manage multiple simultaneous projects indonesia per scope schedule budget followed high level customer satisfaction responsible success project delivery point escalation team customer issues pertaining success project partner customer internal technical teams resolve issue provide reporting management stakeholders project update progress financial perform risk assessment provide feedback potential issues site visit discussion stakeholders BAYU WICAKSONO qualifications minimum one year work experience project manager hold certification related project management professional pmp experience managing agile waterfall methodology projects banking area familiar using scrum method familiar able work collaboration tools monitor report achievement knowledge sdlc methodology knowledge iot technology good analytical thinking problem solving skill attention detail capability project management skill strong communication collaboration skills especially across customer countries good command english spoken written willing travel customer site job description lead manage various company strategic projects banking project implement execute pmbok project management body knowledge includes cost benefits calculations identify manage project risk others perform discipline monitoring achieve challenging targets quality deliveries per milestone monitor bast invoicing achievable per task milestone project manage multiple simultaneous projects indonesia per scope schedule budget followed high level customer satisfaction responsible success project delivery point escalation team customer issues pertaining success project partner customer internal technical teams resolve issue provide reporting management stakeholders project update progress financial perform risk assessment provide feedback potential issues site visit discussion stakeholders\",\n", " \"JDRIL2 Solution Architect - Data/AI\" : \"we seeking experienced solution architect lead design implementation innovative data ai solutions role collaborate stakeholders understand business technical requirements architect optimal data ai solutions responsibilities gather business technical requirements stakeholders design end end data ai solutions develop technical architecture including data stack architecture analytics environment ai determine technical feasibility validate designs against requirements select appropriate technologies considering cost scalability ease integration coordinate data scientist data engineer ai engineer etc architect data pipelines analytics architecture machine learning models apply statistical predictive modeling techniques extract insights communicate complex architecture trade offs executives stakeholders stay date technologies trends mining natural resources requirements two years experience solutions architect similar role five years experience data scientist preferable two more data platform project two more ai adoption project gen ai preferable strong statistical modeling machine learning data science skills knowledge data infrastructure pipelines storage visualization ability develop detailed technical requirements business needs excellent communication strategic thinking abilities comfortable explaining complex architectures trade offs bs ms computer science statistics analytics related field\",\n", " \"JDRIL3 Data Science Intern\" : \"responsibility collect process analyze large datasets extract meaningful insights trends generate monitoring report needed ensure seamless monitoring reporting cycle work cross functional teams understand data needs provide relevant insights perform data validation ensure data accuracy integrity collaborate it teams ensure availability reliability data creating database schemas represent support business process stay date industry trends best practices data analysis visualization requirement bachelors degree data science reputable university fresh graduate outstanding performance welcome proficiency data analysis tools software sql excel python r similar experience data visualization tools tableau power bi similar strong analytical problem solving skills excellent attention detail accuracy good communication interpersonal skills ability convey complex data insights non technical audiences\",\n", " \"JDRIL4 SALES\" : \"Develop and implement marketing plans for each channel which include crafting of content, designing advertisements, and identifying target audiences Monitor competitors' offerings to resolve how they might affect business performance Review competitor's pricing tactics to ensure that we are competitive in the market Work with vendors in making sure products are delivered timely and meet quality standards Orchestrate new programs to drive sales across multiple channels Develop strong and sustainable relationships with key accounts or other distribution channels Run day-to-day operations of a channel, including handling inventory levels, liaising with vendors, and providing customer service to customers Identify new opportunities within a channel that could increase sales through existing customers or help attract new customers to the company's products or services Managing relationships with customers to ensure satisfaction with products and services offered by the company How will you get here? 5+ years of proven experience in business development, distributor partner management, or other customer facing commercial roles Deep understanding of the Laboratory Product market in Indonesia Proficient in English; both verbal and written to communicate with English-speaking business associates Strong communicator, influencing and effective presentation skills Able to collaborate in a matrixed environment working with team with varied strengths Able to travel when needed\",\n", " \"JDRIL5 Data Engineer\" : \"about responsibilities role perform data exploration data cleaning data imputation feature engineering unstructured structured data build infrastructure optimal extraction transformation loading etl data wide variety data sources develop maintain optimal data pipeline architecture training statistical machine learning models regression classification develop maintain evaluations measure effectiveness training data includes measuring capabilities models variety tasks domains collaborate data scientists machine learning engineers develop comprehensive data science machine learning solution pipeline requirements need minimum qualifications bachelors degree computer science related fields equivalent software engineering experience proficiency python programming language experience dataset processing feature engineering using tools numpy pandas scikit learn visualization skills using tools matplotlib seaborn bokeh understanding deep learning frameworks pytorch tensorflow understanding sql nosql understands hadoop spark kafka hive presto proficiency source control ie git preferred make stand crowd preferred qualifications deep understanding object oriented programming oop concepts inheritance delegation abstract class understanding cloud native technologies aws gcp azure experience using docker experience using aws services s three ec two glue sagemaker experience aws step function aws lambda better proficiency scala java programming languages enjoy iterating quickly research prototypes learning new technologies\",\n", " \"JDRIL6 FullStack Developer\" : \"lead developer fullstack developer using java springboot react job description lead manage team developers providing technical guidance mentorship oversee design development implementation scalable web applications manage prioritize development pipeline monitor evaluate progress implement maintain ci cd pipelines using tools jenkins similar devops tools facilitate agile ceremonies sprint planning daily stand ups retrospectives identify address technical challenges bottlenecks within team resolve incidents ensure system availability within sla conduct code reviews provide constructive feedback team members foster collaborative innovative team environment stay updated emerging technologies industry trends drive continuous improvement education bachelor higher degree computer science related fields experience needed long duration needed experience length service five years professional experience software development least two years leadership role additional expertises need proficiency fullstack development expertise technologies spring boot react js strong understanding restful api design implementation experience databases oracle oracle pl sql familiarity version control systems git ci cd pipelines knowledge agile methodologies practices excellent leadership team management abilities strong problem solving skills attention detail effective communication interpersonal skills ability motivate inspire team members experience cloud services aws azure google cloud knowledge docker container orchestration familiarity project management tools jira confluence\"\n", " }" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "T5XIA5WseR-g" }, "outputs": [], "source": [ "# PRAPROSES\n", "preprocessed_cv = {key: preprocessing_data(value) for key, value in cv.items()}\n", "preprocessed_jd = {key: preprocessing_data(value) for key, value in jd.items()}\n", "\n", "encoded_cv = {key: model.encode(value, convert_to_tensor=True) for key, value in preprocessed_cv.items()}\n", "encoded_jd = {key: model.encode(value, convert_to_tensor=True) for key, value in preprocessed_jd.items()}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Ve_fK1S0bOU3", "outputId": "b4e65652-521d-4139-eb3b-ee3bcab91583" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL1 IT Project Manager': 0.1895\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL2 Solution Architect - Data/AI': 0.0908\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL3 Data Science Intern': 0.1525\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL4 SALES': -0.1515\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL5 Data Engineer': 0.2083\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL6 FullStack Developer': 0.1892\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL1 IT Project Manager': 0.1605\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL2 Solution Architect - Data/AI': -0.0608\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL3 Data Science Intern': 0.0045\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL4 SALES': -0.1331\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL5 Data Engineer': 0.0297\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL6 FullStack Developer': 0.3019\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL1 IT Project Manager': -0.1342\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL2 Solution Architect - Data/AI': 0.0131\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL3 Data Science Intern': 0.0080\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL4 SALES': 0.1814\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL5 Data Engineer': -0.0100\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL6 FullStack Developer': -0.1455\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL1 IT Project Manager': 0.0398\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL2 Solution Architect - Data/AI': 0.8727\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL3 Data Science Intern': 0.9289\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL4 SALES': 0.0121\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL5 Data Engineer': 0.8960\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL6 FullStack Developer': -0.0204\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL1 IT Project Manager': 0.5777\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL2 Solution Architect - Data/AI': 0.0888\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL3 Data Science Intern': 0.1402\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL4 SALES': 0.2633\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL5 Data Engineer': 0.0970\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL6 FullStack Developer': 0.7659\n" ] } ], "source": [ "for cv_key, cv_emb in encoded_cv.items():\n", " for jd_key, jd_emb in encoded_jd.items():\n", " similarity_score = util.pytorch_cos_sim(cv_emb, jd_emb).item()\n", " print(f\"Similarity score between CV '{cv_key}' and JD '{jd_key}': {similarity_score:.4f}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "MX0-i7PubX_i" }, "outputs": [], "source": [ "# SKIP PRAPROSES\n", "encoded_cv = {key: model.encode(value, convert_to_tensor=True) for key, value in cv.items()}\n", "encoded_jd = {key: model.encode(value, convert_to_tensor=True) for key, value in jd.items()}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ZlHn2vmSHrln", "outputId": "208d8f83-858a-4e79-ec35-1caaea86925a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL1 IT Project Manager': 0.0912\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL2 Solution Architect - Data/AI': 0.5109\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL3 Data Science Intern': 0.5482\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL4 SALES': -0.0631\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL5 Data Engineer': 0.6435\n", "Similarity score between CV 'CVRIL1 Data Science' and JD 'JDRIL6 FullStack Developer': 0.0958\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL1 IT Project Manager': 0.2059\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL2 Solution Architect - Data/AI': -0.0486\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL3 Data Science Intern': 0.0308\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL4 SALES': -0.0740\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL5 Data Engineer': 0.0558\n", "Similarity score between CV 'CVRIL2 Python Developer' and JD 'JDRIL6 FullStack Developer': 0.3242\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL1 IT Project Manager': -0.1160\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL2 Solution Architect - Data/AI': 0.0152\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL3 Data Science Intern': -0.0154\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL4 SALES': 0.2793\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL5 Data Engineer': -0.0084\n", "Similarity score between CV 'CVRIL3 Sales' and JD 'JDRIL6 FullStack Developer': -0.1297\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL1 IT Project Manager': 0.0398\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL2 Solution Architect - Data/AI': 0.8655\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL3 Data Science Intern': 0.8879\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL4 SALES': 0.1201\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL5 Data Engineer': 0.8653\n", "Similarity score between CV 'CVGPT1 Data Science' and JD 'JDRIL6 FullStack Developer': -0.0046\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL1 IT Project Manager': 0.5425\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL2 Solution Architect - Data/AI': 0.1328\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL3 Data Science Intern': 0.1882\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL4 SALES': 0.2469\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL5 Data Engineer': 0.1456\n", "Similarity score between CV 'CVGPT4 FullStack Developer' and JD 'JDRIL6 FullStack Developer': 0.7187\n" ] } ], "source": [ "for cv_key, cv_emb in encoded_cv.items():\n", " for jd_key, jd_emb in encoded_jd.items():\n", " similarity_score = util.pytorch_cos_sim(cv_emb, jd_emb).item()\n", " print(f\"Similarity score between CV '{cv_key}' and JD '{jd_key}': {similarity_score:.4f}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "WXJ7FWafHu9M" }, "outputs": [], "source": [] } ], "metadata": { "colab": { "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }