Instructions to use cgrumbach/BitcoinPaper with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use cgrumbach/BitcoinPaper with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="cgrumbach/BitcoinPaper")# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("cgrumbach/BitcoinPaper") model = AutoModelForCausalLM.from_pretrained("cgrumbach/BitcoinPaper") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use cgrumbach/BitcoinPaper with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "cgrumbach/BitcoinPaper" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cgrumbach/BitcoinPaper", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/cgrumbach/BitcoinPaper
- SGLang
How to use cgrumbach/BitcoinPaper with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "cgrumbach/BitcoinPaper" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cgrumbach/BitcoinPaper", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "cgrumbach/BitcoinPaper" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "cgrumbach/BitcoinPaper", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use cgrumbach/BitcoinPaper with Docker Model Runner:
docker model run hf.co/cgrumbach/BitcoinPaper
| # preprocessing sub functions | |
| import re | |
| import os | |
| import glob | |
| import string | |
| import pandas as pd | |
| from datetime import datetime | |
| import nltk | |
| from nltk.corpus import stopwords | |
| from nltk.stem import WordNetLemmatizer | |
| import contractions | |
| def remove_deleted(df): | |
| r""" | |
| remove_deleted function. | |
| This function appears to remove deleted post from crawled website data. | |
| Args: | |
| df: dataframe of crawled website data. | |
| Returns: | |
| df: dataframe of crawled website data without deleted post. | |
| """ | |
| # Remove rows where the 'timestamp' column is numeric | |
| df = df[~df['timestamp'].str.isnumeric()] | |
| df.reset_index(drop=True, inplace=True) | |
| return df | |
| def remove_deleted_post(df): | |
| r""" | |
| remove_deleted_post function. | |
| This function appears to remove deleted post where is in another format. | |
| Args: | |
| df: dataframe of crawled website data. | |
| Returns: | |
| df: dataframe of crawled website data without deleted post. | |
| """ | |
| # Remove rows where the 'post' column contains 'del' | |
| df = df[df['post'] != 'del'] | |
| df.reset_index(drop=True, inplace=True) | |
| return df | |
| def update_lastEdit(df): | |
| r""" | |
| update_lastEdit function. | |
| This function appears to fill NaN values in the 'last_edit' column with corresponding values from the 'timestamp' column | |
| Args: | |
| df: dataframe of crawled website data. | |
| Returns: | |
| df: dataframe of crawled website data with updated last_edit. | |
| """ | |
| df.loc[:, 'last_edit'] = df['last_edit'].fillna(df['timestamp']) | |
| return df | |
| def preprocess_date(date_str): | |
| r""" | |
| preprocess_date function. | |
| This function appears to convert occurrences of 'Today' in a date string to the current date | |
| Args: | |
| date_str: str that contains date information. | |
| Returns: | |
| str that contains date information with updated 'Today' to current date. | |
| """ | |
| if "Today " in date_str: | |
| current_date = datetime.now().strftime("%B %d, %Y") | |
| return date_str.replace("Today", current_date) | |
| return date_str | |
| def convert_datetime_with_multiple_formats(date_str, formats): | |
| r""" | |
| convert_datetime_with_multiple_formats function. | |
| This function appears to Convert a date string to a datetime object using multiple possible formats. | |
| Args: | |
| date_str: str that contains date information. | |
| formats: list of possible date formats. | |
| Returns: | |
| datetime object. | |
| """ | |
| for fmt in formats: | |
| try: | |
| return pd.to_datetime(date_str, format=fmt) | |
| except ValueError: | |
| continue | |
| raise ValueError(f"Time data {date_str} doesn't match provided formats") | |
| def convert_to_datetime(df_): | |
| r""" | |
| convert_to_datetime function. | |
| This function appears to convert 'timestamp' and 'last_edit' columns to datetime format | |
| Args: | |
| df_: dataframe of crawled website data. | |
| Returns: | |
| df: dataframe of crawled website data with datatime format in 'timestamp' and 'last_edit' columns. | |
| """ | |
| df = df_.copy() | |
| # Preprocess 'timestamp' and 'last_edit' columns to handle 'Today' values | |
| df['timestamp'] = df['timestamp'].apply(preprocess_date) | |
| df['last_edit'] = df['last_edit'].apply(preprocess_date) | |
| # List of potential datetime formats | |
| datetime_formats = ["%B %d, %Y at %I:%M:%S %p", "%B %d, %Y, %I:%M:%S %p"] | |
| df['timestamp'] = df['timestamp'].apply( | |
| convert_datetime_with_multiple_formats, formats=datetime_formats) | |
| df['timestamp'] = df['timestamp'].dt.date | |
| df['last_edit'] = df['last_edit'].apply( | |
| convert_datetime_with_multiple_formats, formats=datetime_formats) | |
| df['last_edit'] = df['last_edit'].dt.date | |
| return df | |
| def remove_urls(text): | |
| r""" | |
| remove_urls function. | |
| This function appears to Remove URLs from a text. | |
| """ | |
| return re.sub(r'http\S+', '', text) | |
| # | |
| def remove_extra_whitespace(text): | |
| r""" | |
| remove_extra_whitespace function. | |
| This function appears to Remove extra whitespace characters from a text. | |
| """ | |
| return ' '.join(text.split()) | |
| def remove_special_characters(text): | |
| r""" | |
| remove_special_characters function. | |
| This function appears to remove special characters from a text. | |
| """ | |
| return re.sub(r'[^\w\s]', '', text) | |
| def to_lowercase(text): | |
| r""" | |
| to_lowercase function. | |
| This function appears to convert a text to lowercase. | |
| """ | |
| return text.lower() | |
| def remove_meta_info(text): | |
| r""" | |
| remove_meta_info function. | |
| This function appears to remove meta information where it contain quotes information. | |
| """ | |
| text = str(text) | |
| return re.sub(r'Quote from: [a-zA-Z0-9_]+ on [a-zA-Z0-9, :]+ (AM|PM)', '', text) | |
| def tokenize(text): | |
| r""" | |
| tokenize function. | |
| This function appears to Tokenize a text into individual words. | |
| """ | |
| return text.split(' ') | |
| def remove_sentence_punctuation(text): | |
| r""" | |
| remove_sentence_punctuation function. | |
| This function appears to remove punctuation from a text, excluding math symbols. | |
| """ | |
| math_symbols = "+-×*÷/=()[]{},.<>%^" | |
| punctuations_to_remove = ''.join( | |
| set(string.punctuation) - set(math_symbols)) | |
| return text.translate(str.maketrans(punctuations_to_remove, ' ' * len(punctuations_to_remove))) | |
| def lemmatize_text(text): | |
| r""" | |
| lemmatize_text function. | |
| This function appears to lemmatize text, where it convert words to their base form. | |
| """ | |
| lemmatizer = WordNetLemmatizer() | |
| return ' '.join([lemmatizer.lemmatize(word) for word in text.split()]) | |
| def replace_numbers(text, replace_with="<NUM>"): | |
| r""" | |
| replace_numbers function. | |
| This function appears to replace numbers in a text with a specified string (default is "<NUM>"). | |
| """ | |
| return re.sub(r'\b\d+\b', replace_with, text) | |
| def remove_stopwords(tokens): | |
| r""" | |
| remove_stopwords function. | |
| This function appears to remove stopwords from a list of tokens. | |
| """ | |
| stop_words = set(stopwords.words('english')) | |
| return [word for word in tokens if word not in stop_words] | |
| def expand_contractions(tokens): | |
| r""" | |
| expand_contractions function. | |
| This function appears to expand contractions in a list of tokens (e.g., "isn't" to "is not") | |
| """ | |
| return [contractions.fix(word) for word in tokens] | |
| def remove_repeated_phrases(text): | |
| r""" | |
| remove_repeated_phrases function. | |
| This function appears to remove repeated phrases from a text. | |
| eg. "hello hello world" -> "hello world" | |
| """ | |
| phrases = text.split() | |
| seen = set() | |
| output = [] | |
| for phrase in phrases: | |
| if phrase not in seen: | |
| seen.add(phrase) | |
| output.append(phrase) | |
| return ' '.join(output) | |