| package parser |
|
|
| import ( |
| "fmt" |
| "regexp" |
| "strconv" |
| "strings" |
| "time" |
|
|
| "github.com/araddon/dateparse" |
| "golang.org/x/net/html" |
| dt "gorm.io/datatypes" |
|
|
| "github.com/metatube-community/metatube-sdk-go/common/convertor" |
| ) |
|
|
| |
| func ParseInt(s string) int { |
| s = strings.TrimSpace(s) |
| n, _ := strconv.Atoi(s) |
| return n |
| } |
|
|
| |
| func ParseTime(s string) time.Time { |
| s = strings.TrimSpace(s) |
| if ss := regexp.MustCompile(`([\s\d]+)年([\s\d]+)月([\s\d]+)日`). |
| FindStringSubmatch(s); len(ss) == 4 { |
| s = fmt.Sprintf("%s-%s-%s", |
| strings.TrimSpace(ss[1]), |
| strings.TrimSpace(ss[2]), |
| strings.TrimSpace(ss[3])) |
| } |
| t, _ := dateparse.ParseAny(s) |
| return t |
| } |
|
|
| |
| func ParseDate(s string) dt.Date { |
| return dt.Date(ParseTime(s)) |
| } |
|
|
| |
| func ParseDuration(s string) time.Duration { |
| s = convertor.ReplaceSpaceAll(s) |
| s = strings.ToLower(s) |
| s = strings.ReplaceAll(s, "秒", "s") |
| s = strings.ReplaceAll(s, "分", "m") |
| s = strings.ReplaceAll(s, "時", "h") |
| s = strings.ReplaceAll(s, "时", "h") |
| s = strings.ReplaceAll(s, "sec", "s") |
| s = strings.ReplaceAll(s, "min", "m") |
| if ss := regexp.MustCompile(`(?i)(\d+):(\d+):(\d+)`).FindStringSubmatch(s); len(ss) > 0 { |
| s = fmt.Sprintf("%02sh%02sm%02ss", ss[1], ss[2], ss[3]) |
| } else if ss := regexp.MustCompile(`(?i)(\d+):(\d+)`).FindStringSubmatch(s); len(ss) > 0 { |
| s = fmt.Sprintf("%02sm%02ss", ss[1], ss[2]) |
| } else if ss := regexp.MustCompile(`(?i)(\d+[mhs]?)`).FindAllStringSubmatch(s, -1); len(ss) > 0 { |
| ds := make([]string, 0, 3) |
| for _, d := range ss { |
| ds = append(ds, d[1]) |
| } |
| s = strings.Join(ds, "") |
| } |
| d, _ := time.ParseDuration(s) |
| return d |
| } |
|
|
| |
| func ParseRuntime(s string) int { |
| return int(ParseDuration(s).Minutes()) |
| } |
|
|
| |
| func ParseScore(s string) float64 { |
| s = strings.ReplaceAll(s, "点", "") |
| fields := strings.Fields(s) |
| if len(fields) == 0 { |
| return 0 |
| } |
| s = strings.TrimSpace(fields[0]) |
| n, _ := strconv.ParseFloat(s, 64) |
| return n |
| } |
|
|
| |
| func ParseTexts(n *html.Node, texts *[]string) { |
| if n.Type == html.TextNode { |
| if text := strings.TrimSpace(n.Data); text != "" { |
| *texts = append(*texts, text) |
| } |
| } |
| for n := n.FirstChild; n != nil; n = n.NextSibling { |
| ParseTexts(n, texts) |
| } |
| } |
|
|
| func ParseActorNames(s string) (names []string) { |
| add := func(name string) { |
| if name = strings.TrimSpace(name); len(name) > 0 { |
| names = append(names, name) |
| } |
| } |
| sb := &strings.Builder{} |
| for _, r := range s { |
| switch r { |
| case '、', ';', ',': |
| fallthrough |
| case '(', '(': |
| fallthrough |
| case ')', ')': |
| add(sb.String()) |
| sb.Reset() |
| default: |
| sb.WriteRune(r) |
| } |
| } |
| add(sb.String()) |
| return |
| } |
|
|
| func ParseIDToNumber(s string) string { |
| s = strings.ToUpper(s) |
| if ss := regexp.MustCompile(`(\d*[A-Z]+)(\d+)`).FindStringSubmatch(s); len(ss) >= 3 { |
| return fmt.Sprintf("%s-%s", ss[1], ss[2]) |
| } |
| return s |
| } |
|
|
| func ParseBustCupSize(s string) (int, string, error) { |
| sizeRe := regexp.MustCompile(`^(\d+)\s?([A-Z])$`) |
| match := sizeRe.FindStringSubmatch(s) |
|
|
| if len(match) != 3 { |
| return 0, "", fmt.Errorf("invalid format: %s", s) |
| } |
|
|
| num := match[1] |
| unit := match[2] |
|
|
| value, err := strconv.Atoi(num) |
| if err != nil { |
| return 0, "", fmt.Errorf("failed to parse numeric part '%s': %w", num, err) |
| } |
| return value, unit, nil |
| } |
|
|